예제 #1
0
        public static void TestExceptionThrownForUnclippedAlignment()
        {
            var refseq   = "ACAATATA";
            var queryseq = "ACAATAT-";

            var r   = new Sequence(DnaAlphabet.Instance, refseq);
            var q   = new Sequence(DnaAlphabet.Instance, queryseq);
            var aln = new PairwiseSequenceAlignment(r, q);
            var pas = new PairwiseAlignedSequence();

            pas.FirstSequence  = r;
            pas.SecondSequence = q;
            aln.Add(pas);
            Assert.Throws <FormatException> (() => VariantCaller.LeftAlignIndelsAndCallVariants(aln, true));

            refseq   = "AAACAATATA";
            queryseq = "AA-CAATATA";

            r   = new Sequence(DnaAlphabet.Instance, refseq);
            q   = new Sequence(DnaAlphabet.Instance, queryseq);
            aln = new PairwiseSequenceAlignment(r, q);
            pas = new PairwiseAlignedSequence();
            pas.FirstSequence  = r;
            pas.SecondSequence = q;
            aln.Add(pas);
            Assert.Throws <FormatException> (() => VariantCaller.LeftAlignIndelsAndCallVariants(aln, true));
        }
예제 #2
0
        public static void TestLeftAlignmentStep()
        {
            var refseq   = "ACAATAAAAGCGCGCGCGCGTTACGTATAT--ATGGATAT";
            var queryseq = "ACAATAA-AGC--GCGC--GTTACGTATATATATGGATAT";

            var r   = new Sequence(DnaAlphabet.Instance, refseq);
            var q   = new Sequence(DnaAlphabet.Instance, queryseq);
            var aln = new PairwiseSequenceAlignment(r, q);
            var pas = new PairwiseAlignedSequence();

            pas.FirstSequence  = r;
            pas.SecondSequence = q;
            aln.Add(pas);
            var tpl = VariantCaller.LeftAlignIndelsAndCallVariants(aln, true);

            // Check the left alignment
            aln = tpl.Item1 as PairwiseSequenceAlignment;
            var lar         = aln.PairwiseAlignedSequences [0].FirstSequence.ConvertToString();
            var laq         = aln.PairwiseAlignedSequences [0].SecondSequence.ConvertToString();
            var exprefseq   = "ACAATAAAAGCGCGCGCGCGTTACG--TATATATGGATAT";
            var expqueryseq = "ACAAT-AAA----GCGCGCGTTACGTATATATATGGATAT";

            Assert.AreEqual(exprefseq, lar);
            Assert.AreEqual(expqueryseq, laq);

            // And it's hard, so we might as well check the variants
            var variants = tpl.Item2;

            Assert.AreEqual(3, variants.Count);
            string[]    bases   = new string[] { "A", "GCGC", "TA" };
            char[]      hpbases = new char[] { 'A', 'G', 'T' };
            bool[]      inHp    = new bool[] { true, false, false };
            int[]       lengths = new int[] { 1, 4, 2 };
            int[]       starts  = new int[] { 4, 8, 24 };
            IndelType[] types   = new IndelType[] { IndelType.Deletion, IndelType.Deletion, IndelType.Insertion };
            for (int i = 0; i < 3; i++)
            {
                Assert.AreEqual(VariantType.INDEL, variants [i].Type);
                var vi = variants [i] as IndelVariant;
                Assert.AreEqual(hpbases[i], vi.HomopolymerBase);
                Assert.AreEqual(starts [i], vi.StartPosition);
                Assert.AreEqual(lengths [i], vi.Length);
                Assert.AreEqual(bases [i], vi.InsertedOrDeletedBases);
                Assert.AreEqual(inHp [i], vi.InHomopolymer);
                Assert.AreEqual(types [i], vi.InsertionOrDeletion);
            }
        }
예제 #3
0
        public void SequenceAlignmentAddSequence()
        {
            // Read the xml file for getting both the files for aligning.
            string origSequence1 = this.utilityObj.xmlUtil.GetTextValue(Constants.AlignAlgorithmNodeName, Constants.SequenceNode1);
            string origSequence2 = this.utilityObj.xmlUtil.GetTextValue(Constants.AlignAlgorithmNodeName, Constants.SequenceNode2);

            IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(Constants.AlignAlgorithmNodeName, Constants.AlphabetNameNode));

            ApplicationLog.WriteLine(string.Format(null, "SequenceAlignment BVT : First sequence used is '{0}'.", origSequence1));
            ApplicationLog.WriteLine(string.Format(null, "SequenceAlignment BVT : Second sequence used is '{0}'.", origSequence2));

            // Create two sequences
            ISequence aInput = new Sequence(alphabet, origSequence1);
            ISequence bInput = new Sequence(alphabet, origSequence2);

            // Add the sequences to the Sequence alignment object using AddSequence() method.
            IList <IPairwiseSequenceAlignment> sequenceAlignmentObj =
                new List <IPairwiseSequenceAlignment>();

            var alignSeq = new PairwiseAlignedSequence {
                FirstSequence = aInput, SecondSequence = bInput
            };
            IPairwiseSequenceAlignment seqAlignObj = new PairwiseSequenceAlignment();

            seqAlignObj.Add(alignSeq);
            sequenceAlignmentObj.Add(seqAlignObj);

            // Read the output back and validate the same.
            IList <PairwiseAlignedSequence> newAlignedSequences = sequenceAlignmentObj[0].PairwiseAlignedSequences;

            ApplicationLog.WriteLine(string.Format(null, "SequenceAlignment BVT : First sequence read is '{0}'.", origSequence1));
            ApplicationLog.WriteLine(string.Format(null, "SequenceAlignment BVT : Second sequence read is '{0}'.", origSequence2));

            Assert.AreEqual(newAlignedSequences[0].FirstSequence.ConvertToString(), origSequence1);
            Assert.AreEqual(newAlignedSequences[0].SecondSequence.ConvertToString(), origSequence2);
        }
예제 #4
0
        /// <summary>
        /// Validate Sequence Alignment Class General methods exception.
        /// </summary>
        /// <param name="nodeName">Node Name in the xml.</param>
        /// <param name="methodName">Name of the SequenceAlignment method to be validated</param>
        /// </summary>
        static void InValidateSequenceAlignmentGeneralMethods(string nodeName,
                                                              SeqAlignmentMethods methodName)
        {
            // Read the xml file for getting both the files for aligning.
            string origSequence1 = Utility._xmlUtil.GetTextValue(nodeName,
                                                                 Constants.SequenceNode1);
            string origSequence2 = Utility._xmlUtil.GetTextValue(nodeName,
                                                                 Constants.SequenceNode2);
            IAlphabet alphabet = Utility.GetAlphabet(Utility._xmlUtil.GetTextValue(nodeName,
                                                                                   Constants.AlphabetNameNode));
            string readOnlyException = Utility._xmlUtil.GetTextValue(nodeName,
                                                                     Constants.ReadOnlyExceptionNode);
            string expectedGetObjectDataException = Utility._xmlUtil.GetTextValue(nodeName,
                                                                                  Constants.GetObjectDataNullErrorMessageNode);
            string           actualError = null;
            StreamingContext context     = new StreamingContext(StreamingContextStates.All);

            // Create two sequences
            ISequence aInput = new Sequence(alphabet, origSequence1);
            ISequence bInput = new Sequence(alphabet, origSequence2);

            // Add the sequences to the Sequence alignment object using AddSequence() method.
            IList <IPairwiseSequenceAlignment> sequenceAlignmentObj =
                new List <IPairwiseSequenceAlignment>();

            PairwiseAlignedSequence alignSeq = new PairwiseAlignedSequence();

            alignSeq.FirstSequence  = aInput;
            alignSeq.SecondSequence = bInput;
            PairwiseSequenceAlignment seqAlignObj = new PairwiseSequenceAlignment(aInput, bInput);;

            seqAlignObj.Add(alignSeq);
            sequenceAlignmentObj.Add(seqAlignObj);

            // Set SequenceAlignment IsReadOnly prpoerty to true.
            seqAlignObj.IsReadOnly = true;
            IList <PairwiseAlignedSequence> newAlignedSequences =
                sequenceAlignmentObj[0].PairwiseAlignedSequences;

            switch (methodName)
            {
            case SeqAlignmentMethods.Add:
                try
                {
                    seqAlignObj.Add(newAlignedSequences[0]);
                }
                catch (NotSupportedException ex)
                {
                    actualError = ex.Message;
                }
                // Validate Error message
                Assert.AreEqual(readOnlyException, actualError);
                break;

            case SeqAlignmentMethods.Clear:
                try
                {
                    seqAlignObj.Clear();
                }
                catch (NotSupportedException ex)
                {
                    actualError = ex.Message;
                }
                // Validate Error message
                Assert.AreEqual(readOnlyException, actualError);
                break;

            case SeqAlignmentMethods.Remove:
                try
                {
                    seqAlignObj.Remove(newAlignedSequences[0]);
                }
                catch (NotSupportedException ex)
                {
                    actualError = ex.Message;
                }

                // Validate Error message
                Assert.AreEqual(readOnlyException, actualError);
                break;

            case SeqAlignmentMethods.AddSequence:
                try
                {
                    seqAlignObj.AddSequence(newAlignedSequences[0]);
                }
                catch (NotSupportedException ex)
                {
                    actualError = ex.Message;
                }
                // Validate Error message
                Assert.AreEqual(readOnlyException, actualError);
                break;

            case SeqAlignmentMethods.GetObjectData:
                try
                {
                    seqAlignObj.GetObjectData(null, context);
                }
                catch (ArgumentNullException ex)
                {
                    actualError = ex.Message;
                }
                // Validate Error message
                Assert.AreEqual(expectedGetObjectDataException,
                                actualError.Replace("\r", "").Replace("\n", ""));
                break;

            default:
                break;
            }

            ApplicationLog.WriteLine("SequenceAlignment P2 : Successfully validated the IsRead Property");
            Console.WriteLine("SequenceAlignment P2 : Successfully validated the IsRead Property");
        }
예제 #5
0
        /// <summary>
        /// Validates Sequence Alignment test cases for the parameters passed.
        /// </summary>
        /// <param name="nodeName">Node Name in the xml.</param>
        /// <param name="offset">Offset</param>
        /// <param name="caseType">Case type</param>
        /// <param name="type">Sequence type</param>
        static void ValidateGeneralSequenceAlignment(string nodeName,
                                                     OffsetValidation offset, SequenceCaseType caseType, SequenceType type)
        {
            // Read the xml file for getting both the files for aligning.
            string firstInputSequence  = string.Empty;
            string secondInputSequence = string.Empty;

            IAlphabet alphabet = Utility.GetAlphabet(Utility._xmlUtil.GetTextValue(nodeName,
                                                                                   Constants.AlphabetNameNode));

            GetInputSequencesWithSequenceType(nodeName, type, out firstInputSequence,
                                              out secondInputSequence);

            ApplicationLog.WriteLine(string.Format(null,
                                                   "SequenceAlignment P2 : First sequence used is '{0}'.", firstInputSequence));
            ApplicationLog.WriteLine(string.Format(null,
                                                   "SequenceAlignment P2 : Second sequence used is '{0}'.", secondInputSequence));

            Console.WriteLine(string.Format(null,
                                            "SequenceAlignment P2 : First sequence used is '{0}'.", firstInputSequence));
            Console.WriteLine(string.Format(null,
                                            "SequenceAlignment P2 : Second sequence used is '{0}'.", secondInputSequence));

            // Create two sequences
            ISequence aInput          = null;
            ISequence bInput          = null;
            Exception actualException = null;

            switch (caseType)
            {
            case SequenceCaseType.LowerCase:
                aInput = new Sequence(alphabet, firstInputSequence.ToLower(CultureInfo.CurrentCulture));
                bInput = new Sequence(alphabet, secondInputSequence.ToLower(CultureInfo.CurrentCulture));

                break;

            case SequenceCaseType.UpperCase:
                aInput = new Sequence(alphabet, firstInputSequence.ToUpper(CultureInfo.CurrentCulture));
                bInput = new Sequence(alphabet, secondInputSequence.ToUpper(CultureInfo.CurrentCulture));
                break;

            case SequenceCaseType.Default:
                try
                {
                    aInput = new Sequence(alphabet, firstInputSequence);
                    bInput = new Sequence(alphabet, secondInputSequence);
                }
                catch (Exception ex)
                {
                    actualException = ex;
                }
                break;
            }

            // Add the sequences to the Sequence alignment object using AddSequence() method.
            IList <IPairwiseSequenceAlignment> sequenceAlignmentObj = new List <IPairwiseSequenceAlignment>();

            try
            {
                PairwiseAlignedSequence alignSeq = new PairwiseAlignedSequence();
                alignSeq.FirstSequence = aInput;
                IPairwiseSequenceAlignment seqAlignObj = new PairwiseSequenceAlignment();
                seqAlignObj.Add(alignSeq);
                sequenceAlignmentObj.Add(seqAlignObj);
            }
            catch (Exception ex)
            {
                actualException = ex;
            }

            if (actualException == null)
            {
                if (offset == OffsetValidation.Default)
                {
                    sequenceAlignmentObj[0].PairwiseAlignedSequences[0].SecondSequence = bInput;

                    // Read the output back and validate the same.
                    IList <PairwiseAlignedSequence> newAlignedSequences = sequenceAlignmentObj[0].PairwiseAlignedSequences;

                    ApplicationLog.WriteLine(string.Format(null,
                                                           "SequenceAlignment P2 : First sequence read is '{0}'.", firstInputSequence));
                    ApplicationLog.WriteLine(string.Format(null,
                                                           "SequenceAlignment P2 : Second sequence read is '{0}'.", secondInputSequence));

                    Console.WriteLine(string.Format(null,
                                                    "SequenceAlignment P2 : First sequence read is '{0}'.", firstInputSequence));
                    Console.WriteLine(string.Format(null,
                                                    "SequenceAlignment P2 : Second sequence read is '{0}'.", secondInputSequence));

                    Assert.AreEqual(newAlignedSequences[0].FirstSequence.ToString(), firstInputSequence);
                    Assert.AreEqual(newAlignedSequences[0].SecondSequence.ToString(), secondInputSequence);
                }
            }
            else
            {
                // Validate that expected exception is thrown using error message.
                string expectedErrorMessage = Utility._xmlUtil.GetTextValue(nodeName,
                                                                            Constants.ExpectedErrorMessage);
                Assert.AreEqual(expectedErrorMessage, actualException.Message);

                ApplicationLog.WriteLine(string.Format(null,
                                                       "SequenceAlignment P2 : Expected Error message is thrown ", expectedErrorMessage));

                Console.WriteLine(string.Format(null,
                                                "SequenceAlignment P2 : Expected Error message is thrown ", expectedErrorMessage));
            }
        }
예제 #6
0
        /// <summary>
        /// Given two byte arrays representing a pairwise alignment, shift them so
        /// that all deletions start as early as possible.  For example:
        ///
        /// <code>
        /// TTTTAAAATTTT  -> Converts to ->  TTTTAAAATTTT
        /// TTTTAA--TTTT                     TTTT--AATTTT
        /// </code>
        ///
        /// This function takes a IPairwiseSequenceAlignment and assumes that the first sequence is the reference and second
        /// sequence is the query.  It returns a new Pairwise sequence alignment with all of the indels left aligned as well as a list of variants.
        /// </summary>
        /// <param name="aln">Aln. The second sequence should be of type QualitativeSequence or Sequence</param>
        /// <param name="callVariants">callVariants.  If true, it will call variants, otherwise the second half of tuple will be null. </param>
        public static Tuple <IPairwiseSequenceAlignment, List <Variant> > LeftAlignIndelsAndCallVariants(IPairwiseSequenceAlignment aln, bool callVariants = true)
        {
            if (aln == null)
            {
                throw new NullReferenceException("aln");
            }
            if (aln.PairwiseAlignedSequences == null || aln.PairwiseAlignedSequences.Count != 1)
            {
                throw new ArgumentException("The pairwise aligned sequence should only have one alignment");
            }
            var frstAln = aln.PairwiseAlignedSequences.First();
            var seq1    = frstAln.FirstSequence;
            var seq2    = frstAln.SecondSequence;

            if (seq1 == null)
            {
                throw new NullReferenceException("seq1");
            }
            else if (seq2 == null)
            {
                throw new NullReferenceException("seq2");
            }

            //TODO: Might implement an ambiguity check later.
            #if FALSE
            if (seq1.Alphabet.HasAmbiguity || seq2.Alphabet.HasAmbiguity)
            {
                throw new ArgumentException("Cannot left align sequences with ambiguous symbols.");
            }
            #endif

            // Note we have to copy unless we can guarantee the array will not be mutated.
            byte[]         refseq = seq1.ToArray();
            ISequence      newQuery;
            List <Variant> variants = null;
            // Call variants for a qualitative sequence
            if (seq2 is QualitativeSequence)
            {
                var qs    = seq2 as QualitativeSequence;
                var query = Enumerable.Zip(qs, qs.GetQualityScores(), (bp, qv) => new BPandQV(bp, (byte)qv, false)).ToArray();
                AlignmentUtils.LeftAlignIndels(refseq, query);
                AlignmentUtils.VerifyNoGapsOnEnds(refseq, query);
                if (callVariants)
                {
                    variants = VariantCaller.CallVariants(refseq, query, seq2.IsMarkedAsReverseComplement());
                }
                var newQueryQS = new QualitativeSequence(qs.Alphabet,
                                                         qs.FormatType,
                                                         query.Select(z => z.BP).ToArray(),
                                                         query.Select(p => p.QV).ToArray(),
                                                         false);
                newQueryQS.Metadata = seq2.Metadata;
                newQuery            = newQueryQS;
            }
            else if (seq2 is Sequence)      // For a sequence with no QV values.
            {
                var qs    = seq2 as Sequence;
                var query = qs.Select(v => new BPandQV(v, 0, false)).ToArray();
                AlignmentUtils.LeftAlignIndels(refseq, query);
                AlignmentUtils.VerifyNoGapsOnEnds(refseq, query);
                // ISequence does not have a setable metadata
                var newQueryS = new Sequence(qs.Alphabet, query.Select(z => z.BP).ToArray(), false);
                newQueryS.Metadata = seq2.Metadata;
                if (callVariants)
                {
                    variants = VariantCaller.CallVariants(refseq, query, seq2.IsMarkedAsReverseComplement());
                }
                newQuery = newQueryS;
            }
            else
            {
                throw new ArgumentException("Can only left align indels if the query sequence is of type Sequence or QualitativeSequence.");
            }

            if (aln.FirstSequence != null && aln.FirstSequence.ID != null)
            {
                foreach (var v in variants)
                {
                    v.RefName = aln.FirstSequence.ID;
                }
            }

            var newRef = new Sequence(seq1.Alphabet, refseq, false);
            newRef.ID       = seq1.ID;
            newRef.Metadata = seq1.Metadata;

            newQuery.ID = seq2.ID;

            var newaln = new PairwiseSequenceAlignment(aln.FirstSequence, aln.SecondSequence);
            var pas    = new PairwiseAlignedSequence();
            pas.FirstSequence  = newRef;
            pas.SecondSequence = newQuery;
            newaln.Add(pas);
            return(new Tuple <IPairwiseSequenceAlignment, List <Variant> > (newaln, variants));
        }