Ejemplo n.º 1
0
        private void ValidateSmithWatermanAlignment(bool isTextFile, AlignmentParamType alignParam,
                                                    AlignmentType alignType)
        {
            ISequence aInput, bInput;
            IAlphabet alphabet =
                Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(Constants.SmithWatermanAlignAlgorithmNodeName,
                                                                         Constants.AlphabetNameNode));

            if (isTextFile)
            {
                // Read the xml file for getting both the files for aligning.
                string filePath1 = this.utilityObj.xmlUtil.GetTextValue(Constants.SmithWatermanAlignAlgorithmNodeName,
                                                                        Constants.FilePathNode1);
                string filePath2 = this.utilityObj.xmlUtil.GetTextValue(Constants.SmithWatermanAlignAlgorithmNodeName,
                                                                        Constants.FilePathNode2);

                // Parse the files and get the sequence.
                var parseObjectForFile1 = new FastAParser();
                {
                    parseObjectForFile1.Alphabet = alphabet;
                    aInput = parseObjectForFile1.Parse(filePath1).First();
                }

                var parseObjectForFile2 = new FastAParser();
                {
                    parseObjectForFile2.Alphabet = alphabet;
                    bInput = parseObjectForFile2.Parse(filePath2).First();
                }
            }
            else
            {
                // Read the xml file for getting both the files for aligning.
                string origSequence1 = this.utilityObj.xmlUtil.GetTextValue(Constants.SmithWatermanAlignAlgorithmNodeName,
                                                                            Constants.SequenceNode1);
                string origSequence2 = this.utilityObj.xmlUtil.GetTextValue(Constants.SmithWatermanAlignAlgorithmNodeName,
                                                                            Constants.SequenceNode2);
                aInput = new Sequence(alphabet, origSequence1);
                bInput = new Sequence(alphabet, origSequence2);
            }

            string blosumFilePath = this.utilityObj.xmlUtil.GetTextValue(Constants.SmithWatermanAlignAlgorithmNodeName,
                                                                         Constants.BlosumFilePathNode);

            var sm          = new SimilarityMatrix(new StreamReader(blosumFilePath));
            int gapOpenCost =
                int.Parse(
                    this.utilityObj.xmlUtil.GetTextValue(Constants.SmithWatermanAlignAlgorithmNodeName,
                                                         Constants.GapOpenCostNode), null);
            int gapExtensionCost =
                int.Parse(
                    this.utilityObj.xmlUtil.GetTextValue(Constants.SmithWatermanAlignAlgorithmNodeName,
                                                         Constants.GapExtensionCostNode), null);

            var smithWatermanObj = new SmithWatermanAligner();

            if (AlignmentParamType.AllParam != alignParam)
            {
                smithWatermanObj.SimilarityMatrix = sm;
                smithWatermanObj.GapOpenCost      = gapOpenCost;
            }

            IList <IPairwiseSequenceAlignment> result = null;

            switch (alignParam)
            {
            case AlignmentParamType.AlignList:
                switch (alignType)
                {
                case AlignmentType.Align:
                    result = smithWatermanObj.Align(new List <ISequence> {
                        aInput, bInput
                    });
                    break;

                default:
                    result = smithWatermanObj.AlignSimple(new List <ISequence> {
                        aInput, bInput
                    });
                    break;
                }
                break;

            case AlignmentParamType.AlignTwo:
                switch (alignType)
                {
                case AlignmentType.Align:
                    result = smithWatermanObj.Align(aInput, bInput);
                    break;

                default:
                    result = smithWatermanObj.AlignSimple(aInput, bInput);
                    break;
                }
                break;

            case AlignmentParamType.AllParam:
                switch (alignType)
                {
                case AlignmentType.Align:
                    result = smithWatermanObj.Align(sm, gapOpenCost,
                                                    gapExtensionCost, aInput, bInput);
                    break;

                default:
                    result = smithWatermanObj.AlignSimple(sm, gapOpenCost, aInput, bInput);
                    break;
                }
                break;

            default:
                break;
            }

            // Read the xml file for getting both the files for aligning.
            string expectedSequence1, expectedSequence2, expectedScore;

            switch (alignType)
            {
            case AlignmentType.Align:
                expectedScore = this.utilityObj.xmlUtil.GetTextValue(
                    Constants.SmithWatermanAlignAlgorithmNodeName,
                    Constants.ExpectedGapExtensionScoreNode);
                expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(
                    Constants.SmithWatermanAlignAlgorithmNodeName,
                    Constants.ExpectedGapExtensionSequence1Node);
                expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(
                    Constants.SmithWatermanAlignAlgorithmNodeName,
                    Constants.ExpectedGapExtensionSequence2Node);
                break;

            default:
                expectedScore = this.utilityObj.xmlUtil.GetTextValue(
                    Constants.SmithWatermanAlignAlgorithmNodeName,
                    Constants.ExpectedScoreNode);
                expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(
                    Constants.SmithWatermanAlignAlgorithmNodeName,
                    Constants.ExpectedSequenceNode1);
                expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(
                    Constants.SmithWatermanAlignAlgorithmNodeName,
                    Constants.ExpectedSequenceNode2);
                break;
            }

            IList <IPairwiseSequenceAlignment> expectedOutput = new List <IPairwiseSequenceAlignment>();

            IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment();
            var alignedSeq = new PairwiseAlignedSequence
            {
                FirstSequence  = new Sequence(alphabet, expectedSequence1),
                SecondSequence = new Sequence(alphabet, expectedSequence2),
                Score          = Convert.ToInt32(expectedScore, null),
                FirstOffset    = Int32.MinValue,
                SecondOffset   = Int32.MinValue,
            };

            align.PairwiseAlignedSequences.Add(alignedSeq);
            expectedOutput.Add(align);

            ApplicationLog.WriteLine(string.Format(null, "SmithWatermanAligner BVT : Final Score '{0}'.", expectedScore));
            ApplicationLog.WriteLine(string.Format(null, "SmithWatermanAligner BVT : Aligned First Sequence is '{0}'.",
                                                   expectedSequence1));
            ApplicationLog.WriteLine(string.Format(null, "SmithWatermanAligner BVT : Aligned Second Sequence is '{0}'.",
                                                   expectedSequence2));

            Assert.IsTrue(CompareAlignment(result, expectedOutput));
        }
Ejemplo n.º 2
0
        public void TestNUCmer3MultipleReferencesAndQueries()
        {
            Sequence         referenceSeq  = null;
            Sequence         searchSeq     = null;
            List <ISequence> referenceSeqs = null;
            List <ISequence> searchSeqs    = null;

            referenceSeqs = new List <ISequence>();

            string reference = "ATGCGCATCCCC";

            referenceSeq    = new Sequence(Alphabets.DNA, reference);
            referenceSeq.ID = "R1";
            referenceSeqs.Add(referenceSeq);

            reference       = "TAGCT";
            referenceSeq    = new Sequence(Alphabets.DNA, reference);
            referenceSeq.ID = "R11";
            referenceSeqs.Add(referenceSeq);

            searchSeqs = new List <ISequence>();

            string search = "CCGCGCCCCCTC";

            searchSeq    = new Sequence(Alphabets.DNA, search);
            searchSeq.ID = "Q1";
            searchSeqs.Add(searchSeq);

            search       = "AGCT";
            searchSeq    = new Sequence(Alphabets.DNA, search);
            searchSeq.ID = "Q11";
            searchSeqs.Add(searchSeq);

            NucmerPairwiseAligner nucmer = new NucmerPairwiseAligner();

            nucmer.FixedSeparation  = 0;
            nucmer.MinimumScore     = 2;
            nucmer.SeparationFactor = -1;
            nucmer.LengthOfMUM      = 3;
            IList <IPairwiseSequenceAlignment> result = nucmer.Align(referenceSeqs, searchSeqs).Select(a => a as IPairwiseSequenceAlignment).ToList();

            // Check if output is not null
            Assert.AreNotEqual(null, result);

            IList <IPairwiseSequenceAlignment> expectedOutput = new List <IPairwiseSequenceAlignment>();
            IPairwiseSequenceAlignment         align          = new PairwiseSequenceAlignment();
            PairwiseAlignedSequence            alignedSeq     = new PairwiseAlignedSequence();

            alignedSeq.FirstSequence  = new Sequence(Alphabets.DNA, "GCGCATCCCC");
            alignedSeq.SecondSequence = new Sequence(Alphabets.DNA, "GCGC--CCCC");
            alignedSeq.Consensus      = new Sequence(Alphabets.DNA, "GCGCATCCCC");
            alignedSeq.Score          = -5;
            alignedSeq.FirstOffset    = 0;
            alignedSeq.SecondOffset   = 0;
            align.PairwiseAlignedSequences.Add(alignedSeq);
            expectedOutput.Add(align);
            align      = new PairwiseSequenceAlignment();
            alignedSeq = new PairwiseAlignedSequence();
            alignedSeq.FirstSequence  = new Sequence(Alphabets.DNA, "AGCT");
            alignedSeq.SecondSequence = new Sequence(Alphabets.DNA, "AGCT");
            alignedSeq.Consensus      = new Sequence(Alphabets.DNA, "AGCT");
            alignedSeq.Score          = 12;
            alignedSeq.FirstOffset    = 0;
            alignedSeq.SecondOffset   = 1;
            align.PairwiseAlignedSequences.Add(alignedSeq);
            expectedOutput.Add(align);
            Assert.IsTrue(CompareAlignment(result, expectedOutput));
        }
Ejemplo n.º 3
0
        public void TestNUCmer3CustomBreakLength()
        {
            Sequence         referenceSeq  = null;
            Sequence         searchSeq     = null;
            List <ISequence> referenceSeqs = null;
            List <ISequence> searchSeqs    = null;

            referenceSeqs = new List <ISequence>();

            string reference = "CAAAAGGGATTGCAAATGTTGGAGTGAATGCCATTACCTACCGGCTAGGAGGAGT";

            referenceSeq    = new Sequence(Alphabets.DNA, reference);
            referenceSeq.ID = "R1";
            referenceSeqs.Add(referenceSeq);

            reference       = "CCCCCCCCC";
            referenceSeq    = new Sequence(Alphabets.DNA, reference);
            referenceSeq.ID = "R2";
            referenceSeqs.Add(referenceSeq);

            reference       = "TTTTT";
            referenceSeq    = new Sequence(Alphabets.DNA, reference);
            referenceSeq.ID = "R3";
            referenceSeqs.Add(referenceSeq);

            searchSeqs = new List <ISequence>();

            string search = "CATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAA";

            searchSeq    = new Sequence(Alphabets.DNA, search);
            searchSeq.ID = "Q1";
            searchSeqs.Add(searchSeq);

            search       = "CAAAGTCTCTATCAGAATGCAGATGCAGATGTTTTTGTGGGGTCATCAAGATATAGCAAGAAGTTCAAGC";
            searchSeq    = new Sequence(Alphabets.DNA, search);
            searchSeq.ID = "Q2";
            searchSeqs.Add(searchSeq);

            search       = "AAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGC";
            searchSeq    = new Sequence(Alphabets.DNA, search);
            searchSeq.ID = "Q3";
            searchSeqs.Add(searchSeq);

            NucmerPairwiseAligner nucmer = new NucmerPairwiseAligner();

            nucmer.MaximumSeparation = 0;
            nucmer.MinimumScore      = 2;
            nucmer.SeparationFactor  = 0.12F;
            nucmer.LengthOfMUM       = 5;
            nucmer.BreakLength       = 2;
            IList <IPairwiseSequenceAlignment> result = nucmer.Align(referenceSeqs, searchSeqs).Select(a => a as IPairwiseSequenceAlignment).ToList();

            // Check if output is not null
            Assert.AreNotEqual(null, result);

            List <IPairwiseSequenceAlignment> expectedOutput = new List <IPairwiseSequenceAlignment>();
            IPairwiseSequenceAlignment        align          = new PairwiseSequenceAlignment();
            PairwiseAlignedSequence           alignedSeq     = new PairwiseAlignedSequence();

            alignedSeq.FirstSequence  = new Sequence(Alphabets.DNA, "AAAGGGA");
            alignedSeq.SecondSequence = new Sequence(Alphabets.DNA, "AAAGGGA");
            alignedSeq.Consensus      = new Sequence(Alphabets.DNA, "AAAGGGA");
            alignedSeq.Score          = 21;
            alignedSeq.FirstOffset    = 8;
            alignedSeq.SecondOffset   = 0;
            align.PairwiseAlignedSequences.Add(alignedSeq);
            alignedSeq = new PairwiseAlignedSequence();
            alignedSeq.FirstSequence  = new Sequence(Alphabets.DNA, "CATTA");
            alignedSeq.SecondSequence = new Sequence(Alphabets.DNA, "CATTA");
            alignedSeq.Consensus      = new Sequence(Alphabets.DNA, "CATTA");
            alignedSeq.Score          = 15;
            alignedSeq.FirstOffset    = 0;
            alignedSeq.SecondOffset   = 31;
            align.PairwiseAlignedSequences.Add(alignedSeq);
            expectedOutput.Add(align);

            align      = new PairwiseSequenceAlignment();
            alignedSeq = new PairwiseAlignedSequence();
            alignedSeq.FirstSequence  = new Sequence(Alphabets.DNA, "ATGTT");
            alignedSeq.SecondSequence = new Sequence(Alphabets.DNA, "ATGTT");
            alignedSeq.Consensus      = new Sequence(Alphabets.DNA, "ATGTT");
            alignedSeq.Score          = 15;
            alignedSeq.FirstOffset    = 13;
            alignedSeq.SecondOffset   = 0;
            align.PairwiseAlignedSequences.Add(alignedSeq);
            alignedSeq = new PairwiseAlignedSequence();
            alignedSeq.FirstSequence  = new Sequence(Alphabets.DNA, "GAATGC");
            alignedSeq.SecondSequence = new Sequence(Alphabets.DNA, "GAATGC");
            alignedSeq.Consensus      = new Sequence(Alphabets.DNA, "GAATGC");
            alignedSeq.Score          = 18;
            alignedSeq.FirstOffset    = 0;
            alignedSeq.SecondOffset   = 11;
            align.PairwiseAlignedSequences.Add(alignedSeq);
            alignedSeq = new PairwiseAlignedSequence();
            alignedSeq.FirstSequence  = new Sequence(Alphabets.DNA, "TTTTT");
            alignedSeq.SecondSequence = new Sequence(Alphabets.DNA, "TTTTT");
            alignedSeq.Consensus      = new Sequence(Alphabets.DNA, "TTTTT");
            alignedSeq.Score          = 15;
            alignedSeq.FirstOffset    = 31;
            alignedSeq.SecondOffset   = 0;
            align.PairwiseAlignedSequences.Add(alignedSeq);
            expectedOutput.Add(align);

            align      = new PairwiseSequenceAlignment();
            alignedSeq = new PairwiseAlignedSequence();
            alignedSeq.FirstSequence  = new Sequence(Alphabets.DNA, "CAAAA");
            alignedSeq.SecondSequence = new Sequence(Alphabets.DNA, "CAAAA");
            alignedSeq.Consensus      = new Sequence(Alphabets.DNA, "CAAAA");
            alignedSeq.Score          = 15;
            alignedSeq.FirstOffset    = 3;
            alignedSeq.SecondOffset   = 0;
            align.PairwiseAlignedSequences.Add(alignedSeq);
            alignedSeq = new PairwiseAlignedSequence();
            alignedSeq.FirstSequence  = new Sequence(Alphabets.DNA, "GGATT");
            alignedSeq.SecondSequence = new Sequence(Alphabets.DNA, "GGATT");
            alignedSeq.Consensus      = new Sequence(Alphabets.DNA, "GGATT");
            alignedSeq.Score          = 15;
            alignedSeq.FirstOffset    = 45;
            alignedSeq.SecondOffset   = 0;
            align.PairwiseAlignedSequences.Add(alignedSeq);
            alignedSeq = new PairwiseAlignedSequence();
            alignedSeq.FirstSequence  = new Sequence(Alphabets.DNA, "GCAAA");
            alignedSeq.SecondSequence = new Sequence(Alphabets.DNA, "GCAAA");
            alignedSeq.Consensus      = new Sequence(Alphabets.DNA, "GCAAA");
            alignedSeq.Score          = 15;
            alignedSeq.FirstOffset    = 0;
            alignedSeq.SecondOffset   = 9;
            align.PairwiseAlignedSequences.Add(alignedSeq);
            alignedSeq = new PairwiseAlignedSequence();
            alignedSeq.FirstSequence  = new Sequence(Alphabets.DNA, "TTACC");
            alignedSeq.SecondSequence = new Sequence(Alphabets.DNA, "TTACC");
            alignedSeq.Consensus      = new Sequence(Alphabets.DNA, "TTACC");
            alignedSeq.Score          = 15;
            alignedSeq.FirstOffset    = 22;
            alignedSeq.SecondOffset   = 0;
            align.PairwiseAlignedSequences.Add(alignedSeq);
            expectedOutput.Add(align);
            Assert.IsTrue(CompareAlignment(result, expectedOutput));
        }
Ejemplo n.º 4
0
        public void TestNUCmer3CustomBreakLength()
        {
            var referenceSeqs = new List <ISequence>
            {
                new Sequence(Alphabets.DNA, "CAAAAGGGATTGCAAATGTTGGAGTGAATGCCATTACCTACCGGCTAGGAGGAGT")
                {
                    ID = "R1"
                },
                new Sequence(Alphabets.DNA, "CCCCCCCCC")
                {
                    ID = "R2"
                },
                new Sequence(Alphabets.DNA, "TTTTT")
                {
                    ID = "R3"
                },
            };

            var searchSeqs = new List <ISequence>
            {
                new Sequence(Alphabets.DNA, "CATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAA")
                {
                    ID = "Q1"
                },
                new Sequence(Alphabets.DNA, "CAAAGTCTCTATCAGAATGCAGATGCAGATGTTTTTGTGGGGTCATCAAGATATAGCAAGAAGTTCAAGC")
                {
                    ID = "Q2"
                },
                new Sequence(Alphabets.DNA, "AAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGC")
                {
                    ID = "Q3"
                },
            };

            NucmerPairwiseAligner nucmer = new NucmerPairwiseAligner
            {
                MaximumSeparation = 0,
                MinimumScore      = 2,
                SeparationFactor  = 0.12F,
                LengthOfMUM       = 5,
                BreakLength       = 2,
                ForwardOnly       = true
            };

            var result = nucmer.Align(referenceSeqs, searchSeqs)
                         .Select(a => a as IPairwiseSequenceAlignment)
                         .ToList();

            // Check if output is not null
            Assert.IsNotNull(result);

            var expectedOutput = new List <IPairwiseSequenceAlignment>();

            IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment();

            align.PairwiseAlignedSequences.Add(new PairwiseAlignedSequence
            {
                FirstSequence  = new Sequence(Alphabets.DNA, "AAAGGGA"),
                SecondSequence = new Sequence(Alphabets.DNA, "AAAGGGA"),
                Consensus      = new Sequence(Alphabets.DNA, "AAAGGGA"),
                Score          = 21,
                FirstOffset    = 8,
                SecondOffset   = 0
            });
            align.PairwiseAlignedSequences.Add(new PairwiseAlignedSequence
            {
                FirstSequence  = new Sequence(Alphabets.DNA, "CATTA"),
                SecondSequence = new Sequence(Alphabets.DNA, "CATTA"),
                Consensus      = new Sequence(Alphabets.DNA, "CATTA"),
                Score          = 15,
                FirstOffset    = 0,
                SecondOffset   = 31
            });
            expectedOutput.Add(align);

            align = new PairwiseSequenceAlignment();
            align.PairwiseAlignedSequences.Add(new PairwiseAlignedSequence
            {
                FirstSequence  = new Sequence(Alphabets.DNA, "ATGTT"),
                SecondSequence = new Sequence(Alphabets.DNA, "ATGTT"),
                Consensus      = new Sequence(Alphabets.DNA, "ATGTT"),
                Score          = 15,
                FirstOffset    = 13,
                SecondOffset   = 0
            });
            align.PairwiseAlignedSequences.Add(new PairwiseAlignedSequence
            {
                FirstSequence  = new Sequence(Alphabets.DNA, "GAATGC"),
                SecondSequence = new Sequence(Alphabets.DNA, "GAATGC"),
                Consensus      = new Sequence(Alphabets.DNA, "GAATGC"),
                Score          = 18,
                FirstOffset    = 0,
                SecondOffset   = 11
            });
            align.PairwiseAlignedSequences.Add(new PairwiseAlignedSequence
            {
                FirstSequence  = new Sequence(Alphabets.DNA, "TTTTT"),
                SecondSequence = new Sequence(Alphabets.DNA, "TTTTT"),
                Consensus      = new Sequence(Alphabets.DNA, "TTTTT"),
                Score          = 15,
                FirstOffset    = 31,
                SecondOffset   = 0
            });
            expectedOutput.Add(align);

            align = new PairwiseSequenceAlignment();
            align.PairwiseAlignedSequences.Add(new PairwiseAlignedSequence
            {
                FirstSequence  = new Sequence(Alphabets.DNA, "CAAAA"),
                SecondSequence = new Sequence(Alphabets.DNA, "CAAAA"),
                Consensus      = new Sequence(Alphabets.DNA, "CAAAA"),
                Score          = 15,
                FirstOffset    = 3,
                SecondOffset   = 0
            });
            align.PairwiseAlignedSequences.Add(new PairwiseAlignedSequence
            {
                FirstSequence  = new Sequence(Alphabets.DNA, "GGATT"),
                SecondSequence = new Sequence(Alphabets.DNA, "GGATT"),
                Consensus      = new Sequence(Alphabets.DNA, "GGATT"),
                Score          = 15,
                FirstOffset    = 45,
                SecondOffset   = 0
            });
            align.PairwiseAlignedSequences.Add(new PairwiseAlignedSequence
            {
                FirstSequence  = new Sequence(Alphabets.DNA, "GCAAA"),
                SecondSequence = new Sequence(Alphabets.DNA, "GCAAA"),
                Consensus      = new Sequence(Alphabets.DNA, "GCAAA"),
                Score          = 15,
                FirstOffset    = 0,
                SecondOffset   = 9
            });
            align.PairwiseAlignedSequences.Add(new PairwiseAlignedSequence
            {
                FirstSequence  = new Sequence(Alphabets.DNA, "TTACC"),
                SecondSequence = new Sequence(Alphabets.DNA, "TTACC"),
                Consensus      = new Sequence(Alphabets.DNA, "TTACC"),
                Score          = 15,
                FirstOffset    = 22,
                SecondOffset   = 0
            });
            expectedOutput.Add(align);

            Assert.IsTrue(AlignmentHelpers.CompareAlignment(result, expectedOutput));
        }
Ejemplo n.º 5
0
        /// <summary>
        ///     Validates PairwiseOverlapAlignment algorithm for the parameters passed.
        /// </summary>
        /// <param name="isTextFile">Is text file an input.</param>
        /// <param name="alignParam">parameter based on which certain validations are done.</param>
        /// <param name="alignType">Is the Align type Simple or Align with Gap Extension cost?</param>
        private void ValidatePairwiseOverlapAlignment(bool isTextFile, AlignmentParamType alignParam, AlignmentType alignType)
        {
            ISequence aInput;
            ISequence bInput;

            IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.AlphabetNameNode));

            if (isTextFile)
            {
                // Read the xml file for getting both the files for aligning.
                string filePath1 = this.utilityObj.xmlUtil.GetTextValue(Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.FilePathNode1);
                string filePath2 = this.utilityObj.xmlUtil.GetTextValue(Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.FilePathNode2);

                //Parse the files and get the sequence.
                var parser = new FastAParser {
                    Alphabet = alphabet
                };
                aInput = parser.Parse(filePath1).ElementAt(0);
                bInput = parser.Parse(filePath2).ElementAt(0);
            }
            else
            {
                // Read the xml file for getting both the files for aligning.
                string origSequence1 = this.utilityObj.xmlUtil.GetTextValue(Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.SequenceNode1);
                string origSequence2 = this.utilityObj.xmlUtil.GetTextValue(Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.SequenceNode2);
                aInput = new Sequence(alphabet, origSequence1);
                bInput = new Sequence(alphabet, origSequence2);
            }

            var aInputString = aInput.ConvertToString();
            var bInputString = bInput.ConvertToString();

            ApplicationLog.WriteLine(string.Format(null, "PairwiseOverlapAligner BVT : First sequence used is '{0}'.", aInputString));
            ApplicationLog.WriteLine(string.Format(null, "PairwiseOverlapAligner BVT : Second sequence used is '{0}'.", bInputString));

            string blosumFilePath = this.utilityObj.xmlUtil.GetTextValue(Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.BlosumFilePathNode);

            var sm               = new SimilarityMatrix(new StreamReader(blosumFilePath));
            int gapOpenCost      = int.Parse(this.utilityObj.xmlUtil.GetTextValue(Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.GapOpenCostNode), null);
            int gapExtensionCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.GapExtensionCostNode), null);

            var pairwiseOverlapObj = new PairwiseOverlapAligner();

            if (AlignmentParamType.AllParam != alignParam)
            {
                pairwiseOverlapObj.SimilarityMatrix = sm;
                pairwiseOverlapObj.GapOpenCost      = gapOpenCost;
            }

            IList <IPairwiseSequenceAlignment> result = null;

            switch (alignParam)
            {
            case AlignmentParamType.AlignList:
                var sequences = new List <ISequence> {
                    aInput, bInput
                };
                switch (alignType)
                {
                case AlignmentType.Align:
                    result = pairwiseOverlapObj.Align(sequences);
                    break;

                default:
                    result = pairwiseOverlapObj.AlignSimple(sequences);
                    break;
                }
                break;

            case AlignmentParamType.AlignTwo:
                switch (alignType)
                {
                case AlignmentType.Align:
                    result = pairwiseOverlapObj.Align(aInput, bInput);
                    break;

                default:
                    result = pairwiseOverlapObj.AlignSimple(aInput, bInput);
                    break;
                }
                break;

            case AlignmentParamType.AllParam:
                switch (alignType)
                {
                case AlignmentType.Align:
                    result = pairwiseOverlapObj.Align(sm, gapOpenCost,
                                                      gapExtensionCost, aInput, bInput);
                    break;

                default:
                    result = pairwiseOverlapObj.AlignSimple(sm, gapOpenCost, aInput, bInput);
                    break;
                }
                break;

            default:
                break;
            }

            // Read the xml file for getting both the files for aligning.
            string expectedSequence1;
            string expectedSequence2;
            string expectedScore;

            switch (alignType)
            {
            case AlignmentType.Align:
                expectedScore     = this.utilityObj.xmlUtil.GetTextValue(Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.ExpectedGapExtensionScoreNode);
                expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.ExpectedGapExtensionSequence1Node);
                expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.ExpectedGapExtensionSequence2Node);
                break;

            default:
                expectedScore     = this.utilityObj.xmlUtil.GetTextValue(Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.ExpectedScoreNode);
                expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.ExpectedSequenceNode1);
                expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.ExpectedSequenceNode2);
                break;
            }

            IList <IPairwiseSequenceAlignment> expectedOutput = new List <IPairwiseSequenceAlignment>();
            var seperators = new[] { ';' };

            string[] expectedSequences1 = expectedSequence1.Split(seperators);
            string[] expectedSequences2 = expectedSequence2.Split(seperators);

            IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment();

            for (int i = 0; i < expectedSequences1.Length; i++)
            {
                PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence
                {
                    FirstSequence  = new Sequence(alphabet, expectedSequences1[i]),
                    SecondSequence = new Sequence(alphabet, expectedSequences2[i]),
                    Score          = Convert.ToInt32(expectedScore, null)
                };
                align.PairwiseAlignedSequences.Add(alignedSeq);
            }

            expectedOutput.Add(align);
            Assert.IsTrue(AlignmentHelpers.CompareAlignment(result, expectedOutput));

            ApplicationLog.WriteLine(string.Format(null, "PairwiseOverlapAligner BVT : Final Score '{0}'.", expectedScore));
            ApplicationLog.WriteLine(string.Format(null, "PairwiseOverlapAligner BVT : Aligned First Sequence is '{0}'.", expectedSequence1));
            ApplicationLog.WriteLine(string.Format(null, "PairwiseOverlapAligner BVT : Aligned Second Sequence is '{0}'.", expectedSequence2));
        }
Ejemplo n.º 6
0
        void ValidateMUMmerAlignGeneralTestCases(string nodeName)
        {
            ISequence referenceSeq                = null;
            IEnumerable <ISequence> querySeqs     = null;
            IEnumerable <ISequence> referenceSeqs = null;

            // Gets the reference sequence from the configurtion file
            string filePath = utilityObj.xmlUtil.GetTextValue(nodeName,
                                                              Constants.FilePathNode);

            Assert.IsNotNull(filePath);
            ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                   "MUMmer P2 : Successfully validated the File Path '{0}'.",
                                                   filePath));

            FastAParser fastaParserObj = new FastAParser(filePath);

            referenceSeqs = fastaParserObj.Parse();

            referenceSeq = referenceSeqs.ElementAt(0);

            // Gets the reference sequence from the configurtion file
            string queryFilePath = utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                   Constants.SearchSequenceFilePathNode);

            Assert.IsNotNull(queryFilePath);
            ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                   "MUMmer P2 : Successfully validated the Search File Path '{0}'.",
                                                   queryFilePath));

            FastAParser fastaParserObj1 = new FastAParser(queryFilePath);

            querySeqs = fastaParserObj1.Parse();

            string mumLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MUMAlignLengthNode);

            MUMmerAligner mum = new MUMmerAligner();

            mum.LengthOfMUM = long.Parse(mumLength, null);
            mum.StoreMUMs   = true;

            mum.PairWiseAlgorithm = new NeedlemanWunschAligner();
            mum.GapOpenCost       =
                int.Parse(utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapOpenCostNode),
                          (IFormatProvider)null);
            IList <IPairwiseSequenceAlignment> align = null;

            align = mum.Align(referenceSeq, querySeqs);

            // Validate FinalMUMs and MUMs Properties.
            Assert.IsNotNull(mum.MUMs);

            string expectedScore = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ScoreNodeName);

            string[] expectedSequences =
                utilityObj.xmlUtil.GetTextValues(nodeName, Constants.ExpectedSequencesNode);
            IList <IPairwiseSequenceAlignment> expectedOutput = new List <IPairwiseSequenceAlignment>();

            IPairwiseSequenceAlignment seqAlign   = new PairwiseSequenceAlignment();
            PairwiseAlignedSequence    alignedSeq = new PairwiseAlignedSequence();

            alignedSeq.FirstSequence  = new Sequence(referenceSeq.Alphabet, expectedSequences[0]);
            alignedSeq.SecondSequence = new Sequence(referenceSeq.Alphabet, expectedSequences[1]);
            alignedSeq.Score          = Convert.ToInt32(expectedScore, (IFormatProvider)null);
            seqAlign.PairwiseAlignedSequences.Add(alignedSeq);
            expectedOutput.Add(seqAlign);
            Assert.IsTrue(CompareAlignment(align, expectedOutput));

            Console.WriteLine("MUMmer P2 : Successfully validated the aligned sequences.");
            ApplicationLog.WriteLine("MUMmer P2 : Successfully validated the aligned sequences.");
        }
Ejemplo n.º 7
0
        /// <summary>
        /// This method is considered as main execute method which defines the
        /// step by step algorithm. Derived class flows the defined flow by this
        /// method. Store generated MUMs in properties MUMs, SortedMUMs.
        /// Alignment first finds MUMs for all the query sequence, and then
        /// runs pairwise algorithm on gaps to produce alignments.
        /// </summary>
        /// <param name="referenceSequence">Reference sequence.</param>
        /// <param name="querySequenceList">List of input sequences.</param>
        /// <returns>A list of sequence alignments.</returns>
        private IList <IPairwiseSequenceAlignment> AlignmentWithAccumulatedMUMs(
            ISequence referenceSequence,
            IEnumerable <ISequence> querySequenceList)
        {
            // Get MUMs
            this.mums = new Dictionary <ISequence, IEnumerable <Match> >();
            IList <IPairwiseSequenceAlignment> results   = new List <IPairwiseSequenceAlignment>();
            IPairwiseSequenceAlignment         alignment = null;
            IEnumerable <Match> mum;

            if (this.Validate(referenceSequence, querySequenceList))
            {
                // Safety check for public methods to ensure that null
                // inputs are handled.
                if (referenceSequence == null || querySequenceList == null)
                {
                    return(null);
                }

                Sequence seq = referenceSequence as Sequence;
                if (seq == null)
                {
                    throw new ArgumentException(Properties.Resource.OnlySequenceClassSupported);
                }

                MUMmer mummer = new MUMmer(seq);
                mummer.LengthOfMUM = this.LengthOfMUM;
                mummer.NoAmbiguity = this.AmbigiousMatchesAllowed;
                foreach (ISequence sequence in querySequenceList)
                {
                    if (sequence.Equals(referenceSequence))
                    {
                        continue;
                    }

                    alignment = new PairwiseSequenceAlignment(referenceSequence, sequence);

                    // Step2 : streaming process is performed with the query sequence
                    if (this.MaximumMatchEnabled)
                    {
                        mum = mummer.GetMatches(sequence);
                    }
                    else
                    {
                        mum = mummer.GetMatchesUniqueInReference(sequence);
                    }

                    this.mums.Add(sequence, mum);

                    // Step3(a) : sorted mum list based on reference sequence
                    LongestIncreasingSubsequence lis = new LongestIncreasingSubsequence();
                    IList <Match> sortedMumList      = lis.SortMum(GetMumsForLIS(mum));

                    if (sortedMumList.Count > 0)
                    {
                        // Step3(b) : LIS using greedy cover algorithm
                        IList <Match> finalMumList = lis.GetLongestSequence(sortedMumList);

                        if (finalMumList.Count > 0)
                        {
                            // Step 4 : get all the gaps in each sequence and call
                            // pairwise alignment
                            alignment.PairwiseAlignedSequences.Add(
                                this.ProcessGaps(referenceSequence, sequence, finalMumList));
                        }

                        results.Add(alignment);
                    }
                    else
                    {
                        IList <IPairwiseSequenceAlignment> sequenceAlignment = this.RunPairWise(
                            referenceSequence,
                            sequence);

                        foreach (IPairwiseSequenceAlignment pairwiseAlignment in sequenceAlignment)
                        {
                            results.Add(pairwiseAlignment);
                        }
                    }
                }
            }

            return(results);
        }
Ejemplo n.º 8
0
        public void TestMUMmer3MultipleMumWithCustomMatrix()
        {
            string reference = "ATGCGCATCCCCTT";
            string search    = "GCGCCCCCTA";

            Sequence referenceSeq = null;
            Sequence searchSeq    = null;

            referenceSeq = new Sequence(Alphabets.DNA, reference);
            searchSeq    = new Sequence(Alphabets.DNA, search);

            List <ISequence> searchSeqs = new List <ISequence>();

            searchSeqs.Add(searchSeq);

            int[,] customMatrix = new int[256, 256];

            customMatrix[(byte)'A', (byte)'A'] = 3;
            customMatrix[(byte)'A', (byte)'T'] = -2;
            customMatrix[(byte)'A', (byte)'G'] = -2;
            customMatrix[(byte)'A', (byte)'c'] = -2;

            customMatrix[(byte)'G', (byte)'G'] = 3;
            customMatrix[(byte)'G', (byte)'A'] = -2;
            customMatrix[(byte)'G', (byte)'T'] = -2;
            customMatrix[(byte)'G', (byte)'C'] = -2;

            customMatrix[(byte)'T', (byte)'T'] = 3;
            customMatrix[(byte)'T', (byte)'A'] = -2;
            customMatrix[(byte)'T', (byte)'G'] = -2;
            customMatrix[(byte)'T', (byte)'C'] = -2;

            customMatrix[(byte)'C', (byte)'C'] = 3;
            customMatrix[(byte)'C', (byte)'T'] = -2;
            customMatrix[(byte)'C', (byte)'A'] = -2;
            customMatrix[(byte)'C', (byte)'G'] = -2;

            DiagonalSimilarityMatrix matrix = new DiagonalSimilarityMatrix(3, -2);

            int gapOpenCost = -6;

            MUMmerAligner mummer = new MUMmerAligner();

            mummer.LengthOfMUM       = 4;
            mummer.PairWiseAlgorithm = new NeedlemanWunschAligner();
            mummer.SimilarityMatrix  = matrix;
            mummer.GapOpenCost       = gapOpenCost;
            mummer.GapExtensionCost  = -2;

            IList <IPairwiseSequenceAlignment> result = mummer.AlignSimple(referenceSeq, searchSeqs);

            // Check if output is not null
            Assert.AreNotEqual(null, result);

            IList <IPairwiseSequenceAlignment> expectedOutput = new List <IPairwiseSequenceAlignment>();
            IPairwiseSequenceAlignment         align          = new PairwiseSequenceAlignment();
            PairwiseAlignedSequence            alignedSeq     = new PairwiseAlignedSequence();

            alignedSeq.FirstSequence  = new Sequence(Alphabets.DNA, "ATGCGCATCCCCTT");
            alignedSeq.SecondSequence = new Sequence(Alphabets.DNA, "--GCGC--CCCCTA");
            alignedSeq.Consensus      = new Sequence(AmbiguousDnaAlphabet.Instance, "ATGCGCATCCCCTW");
            alignedSeq.Score          = 1;
            alignedSeq.FirstOffset    = 0;
            alignedSeq.SecondOffset   = 2;
            align.PairwiseAlignedSequences.Add(alignedSeq);
            expectedOutput.Add(align);
            Assert.IsTrue(CompareAlignment(result, expectedOutput));
        }
Ejemplo n.º 9
0
        /// <summary>
        /// Given two byte arrays representing a pairwise alignment, shift them so
        /// that all deletions start as early as possible.  For example:
        ///
        /// <code>
        /// TTTTAAAATTTT  -> Converts to ->  TTTTAAAATTTT
        /// TTTTAA--TTTT                     TTTT--AATTTT
        /// </code>
        ///
        /// This function takes a IPairwiseSequenceAlignment and assumes that the first sequence is the reference and second
        /// sequence is the query.  It returns a new Pairwise sequence alignment with all of the indels left aligned as well as a list of variants.
        /// </summary>
        /// <param name="aln">Aln. The second sequence should be of type QualitativeSequence or Sequence</param>
        /// <param name="callVariants">callVariants.  If true, it will call variants, otherwise the second half of tuple will be null. </param>
        public static Tuple <IPairwiseSequenceAlignment, List <Variant> > LeftAlignIndelsAndCallVariants(IPairwiseSequenceAlignment aln, bool callVariants = true)
        {
            if (aln == null)
            {
                throw new NullReferenceException("aln");
            }
            if (aln.PairwiseAlignedSequences == null || aln.PairwiseAlignedSequences.Count != 1)
            {
                throw new ArgumentException("The pairwise aligned sequence should only have one alignment");
            }
            var frstAln = aln.PairwiseAlignedSequences.First();
            var seq1    = frstAln.FirstSequence;
            var seq2    = frstAln.SecondSequence;

            if (seq1 == null)
            {
                throw new NullReferenceException("seq1");
            }
            else if (seq2 == null)
            {
                throw new NullReferenceException("seq2");
            }

            //TODO: Might implement an ambiguity check later.
            #if FALSE
            if (seq1.Alphabet.HasAmbiguity || seq2.Alphabet.HasAmbiguity)
            {
                throw new ArgumentException("Cannot left align sequences with ambiguous symbols.");
            }
            #endif

            // Note we have to copy unless we can guarantee the array will not be mutated.
            byte[]         refseq = seq1.ToArray();
            ISequence      newQuery;
            List <Variant> variants = null;
            // Call variants for a qualitative sequence
            if (seq2 is QualitativeSequence)
            {
                var qs    = seq2 as QualitativeSequence;
                var query = Enumerable.Zip(qs, qs.GetQualityScores(), (bp, qv) => new BPandQV(bp, (byte)qv, false)).ToArray();
                AlignmentUtils.LeftAlignIndels(refseq, query);
                AlignmentUtils.VerifyNoGapsOnEnds(refseq, query);
                if (callVariants)
                {
                    variants = VariantCaller.CallVariants(refseq, query, seq2.IsMarkedAsReverseComplement());
                }
                var newQueryQS = new QualitativeSequence(qs.Alphabet,
                                                         qs.FormatType,
                                                         query.Select(z => z.BP).ToArray(),
                                                         query.Select(p => p.QV).ToArray(),
                                                         false);
                newQueryQS.Metadata = seq2.Metadata;
                newQuery            = newQueryQS;
            }
            else if (seq2 is Sequence)      // For a sequence with no QV values.
            {
                var qs    = seq2 as Sequence;
                var query = qs.Select(v => new BPandQV(v, 0, false)).ToArray();
                AlignmentUtils.LeftAlignIndels(refseq, query);
                AlignmentUtils.VerifyNoGapsOnEnds(refseq, query);
                // ISequence does not have a setable metadata
                var newQueryS = new Sequence(qs.Alphabet, query.Select(z => z.BP).ToArray(), false);
                newQueryS.Metadata = seq2.Metadata;
                if (callVariants)
                {
                    variants = VariantCaller.CallVariants(refseq, query, seq2.IsMarkedAsReverseComplement());
                }
                newQuery = newQueryS;
            }
            else
            {
                throw new ArgumentException("Can only left align indels if the query sequence is of type Sequence or QualitativeSequence.");
            }

            if (aln.FirstSequence != null && aln.FirstSequence.ID != null)
            {
                foreach (var v in variants)
                {
                    v.RefName = aln.FirstSequence.ID;
                }
            }

            var newRef = new Sequence(seq1.Alphabet, refseq, false);
            newRef.ID       = seq1.ID;
            newRef.Metadata = seq1.Metadata;

            newQuery.ID = seq2.ID;

            var newaln = new PairwiseSequenceAlignment(aln.FirstSequence, aln.SecondSequence);
            var pas    = new PairwiseAlignedSequence();
            pas.FirstSequence  = newRef;
            pas.SecondSequence = newQuery;
            newaln.Add(pas);
            return(new Tuple <IPairwiseSequenceAlignment, List <Variant> > (newaln, variants));
        }
Ejemplo n.º 10
0
        /// <summary>
        /// Validates PairwiseOverlapAlignment algorithm for the parameters passed.
        /// </summary>
        /// <param name="isTextFile">Is text file an input.</param>
        /// <param name="alignParam">parameter based on which certain validations are done.</param>
        /// <param name="alignType">Is the Align type Simple or Align with Gap Extension cost?</param>
        void ValidatePairwiseOverlapAlignment(bool isTextFile, AlignmentParamType alignParam,
                                              AlignmentType alignType, bool IsUseEARTHToFillMatrix)
        {
            ISequence aInput = null;
            ISequence bInput = null;

            IAlphabet alphabet = Utility.GetAlphabet(utilityObj.xmlUtil.GetTextValue(
                                                         Constants.PairwiseOverlapAlignAlgorithmNodeName,
                                                         Constants.AlphabetNameNode));

            if (isTextFile)
            {
                // Read the xml file for getting both the files for aligning.
                string filePath1 = utilityObj.xmlUtil.GetTextValue(
                    Constants.PairwiseOverlapAlignAlgorithmNodeName,
                    Constants.FilePathNode1);
                string filePath2 = utilityObj.xmlUtil.GetTextValue(
                    Constants.PairwiseOverlapAlignAlgorithmNodeName,
                    Constants.FilePathNode2);

                //Parse the files and get the sequence.

                using (FastAParser parser1 = new FastAParser(filePath1))
                {
                    parser1.Alphabet = alphabet;
                    aInput           = parser1.Parse().ElementAt(0);
                }

                using (FastAParser parser2 = new FastAParser(filePath2))
                {
                    parser2.Alphabet = alphabet;
                    bInput           = parser2.Parse().ElementAt(0);
                }
            }
            else
            {
                // Read the xml file for getting both the files for aligning.
                string origSequence1 = utilityObj.xmlUtil.GetTextValue(
                    Constants.PairwiseOverlapAlignAlgorithmNodeName,
                    Constants.SequenceNode1);
                string origSequence2 = utilityObj.xmlUtil.GetTextValue(
                    Constants.PairwiseOverlapAlignAlgorithmNodeName,
                    Constants.SequenceNode2);
                aInput = new Sequence(alphabet, origSequence1);
                bInput = new Sequence(alphabet, origSequence2);
            }

            string aInputString = new string(aInput.Select(a => (char)a).ToArray());
            string bInputString = new string(bInput.Select(a => (char)a).ToArray());


            ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                   "PairwiseOverlapAligner BVT : First sequence used is '{0}'.",
                                                   aInputString));
            ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                   "PairwiseOverlapAligner BVT : Second sequence used is '{0}'.",
                                                   bInputString));

            Console.WriteLine(string.Format((IFormatProvider)null,
                                            "PairwiseOverlapAligner BVT : First sequence used is '{0}'.",
                                            aInputString));
            Console.WriteLine(string.Format((IFormatProvider)null,
                                            "PairwiseOverlapAligner BVT : Second sequence used is '{0}'.",
                                            bInputString));

            string blosumFilePath = utilityObj.xmlUtil.GetTextValue(
                Constants.PairwiseOverlapAlignAlgorithmNodeName,
                Constants.BlosumFilePathNode);

            SimilarityMatrix sm = new SimilarityMatrix(blosumFilePath);
            int gapOpenCost     = int.Parse(utilityObj.xmlUtil.GetTextValue(
                                                Constants.PairwiseOverlapAlignAlgorithmNodeName,
                                                Constants.GapOpenCostNode), (IFormatProvider)null);

            int gapExtensionCost = int.Parse(utilityObj.xmlUtil.GetTextValue(
                                                 Constants.PairwiseOverlapAlignAlgorithmNodeName,
                                                 Constants.GapExtensionCostNode), (IFormatProvider)null);

            PairwiseOverlapAligner pairwiseOverlapObj = new PairwiseOverlapAligner();

            if (AlignmentParamType.AllParam != alignParam)
            {
                pairwiseOverlapObj.SimilarityMatrix = sm;
                pairwiseOverlapObj.GapOpenCost      = gapOpenCost;
            }
            if (IsUseEARTHToFillMatrix)
            {
                pairwiseOverlapObj.UseEARTHToFillMatrix = true;
            }
            IList <IPairwiseSequenceAlignment> result = null;

            switch (alignParam)
            {
            case AlignmentParamType.AlignList:
                List <ISequence> sequences = new List <ISequence>();
                sequences.Add(aInput);
                sequences.Add(bInput);
                switch (alignType)
                {
                case AlignmentType.Align:
                    result = pairwiseOverlapObj.Align(sequences);
                    break;

                default:
                    result = pairwiseOverlapObj.AlignSimple(sequences);
                    break;
                }
                break;

            case AlignmentParamType.AlignTwo:
                switch (alignType)
                {
                case AlignmentType.Align:
                    result = pairwiseOverlapObj.Align(aInput, bInput);
                    break;

                default:
                    result = pairwiseOverlapObj.AlignSimple(aInput, bInput);
                    break;
                }
                break;

            case AlignmentParamType.AllParam:
                switch (alignType)
                {
                case AlignmentType.Align:
                    result = pairwiseOverlapObj.Align(sm, gapOpenCost,
                                                      gapExtensionCost, aInput, bInput);
                    break;

                default:
                    result = pairwiseOverlapObj.AlignSimple(sm, gapOpenCost, aInput, bInput);
                    break;
                }
                break;

            default:
                break;
            }

            pairwiseOverlapObj = null;
            aInput             = null;
            bInput             = null;
            sm = null;

            // Read the xml file for getting both the files for aligning.
            string expectedSequence1 = string.Empty;
            string expectedSequence2 = string.Empty;
            string expectedScore     = string.Empty;

            aInput = null;
            bInput = null;
            sm     = null;

            switch (alignType)
            {
            case AlignmentType.Align:
                expectedScore = utilityObj.xmlUtil.GetTextValue(
                    Constants.PairwiseOverlapAlignAlgorithmNodeName,
                    Constants.ExpectedGapExtensionScoreNode);
                expectedSequence1 = utilityObj.xmlUtil.GetTextValue(
                    Constants.PairwiseOverlapAlignAlgorithmNodeName,
                    Constants.ExpectedGapExtensionSequence1Node);
                expectedSequence2 = utilityObj.xmlUtil.GetTextValue(
                    Constants.PairwiseOverlapAlignAlgorithmNodeName,
                    Constants.ExpectedGapExtensionSequence2Node);
                break;

            default:
                expectedScore = utilityObj.xmlUtil.GetTextValue(
                    Constants.PairwiseOverlapAlignAlgorithmNodeName,
                    Constants.ExpectedScoreNode);
                expectedSequence1 = utilityObj.xmlUtil.GetTextValue(
                    Constants.PairwiseOverlapAlignAlgorithmNodeName,
                    Constants.ExpectedSequenceNode1);
                expectedSequence2 = utilityObj.xmlUtil.GetTextValue(
                    Constants.PairwiseOverlapAlignAlgorithmNodeName,
                    Constants.ExpectedSequenceNode2);
                break;
            }

            IList <IPairwiseSequenceAlignment> expectedOutput = new List <IPairwiseSequenceAlignment>();

            string[] expectedSequences1, expectedSequences2;
            char[]   seperators = new char[1] {
                ';'
            };
            expectedSequences1 = expectedSequence1.Split(seperators);
            expectedSequences2 = expectedSequence2.Split(seperators);

            IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment();
            PairwiseAlignedSequence    alignedSeq;

            for (int i = 0; i < expectedSequences1.Length; i++)
            {
                alignedSeq = new PairwiseAlignedSequence();
                alignedSeq.FirstSequence  = new Sequence(alphabet, expectedSequences1[i]);
                alignedSeq.SecondSequence = new Sequence(alphabet, expectedSequences2[i]);
                alignedSeq.Score          = Convert.ToInt32(expectedScore, (IFormatProvider)null);
                align.PairwiseAlignedSequences.Add(alignedSeq);
            }

            expectedOutput.Add(align);
            Assert.IsTrue(CompareAlignment(result, expectedOutput));

            ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                   "PairwiseOverlapAligner BVT : Final Score '{0}'.", expectedScore));
            ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                   "PairwiseOverlapAligner BVT : Aligned First Sequence is '{0}'.",
                                                   expectedSequence1));
            ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                   "PairwiseOverlapAligner BVT : Aligned Second Sequence is '{0}'.",
                                                   expectedSequence2));

            Console.WriteLine(string.Format((IFormatProvider)null,
                                            "PairwiseOverlapAligner BVT : Final Score '{0}'.", expectedScore));
            Console.WriteLine(string.Format((IFormatProvider)null,
                                            "PairwiseOverlapAligner BVT : Aligned First Sequence is '{0}'.",
                                            expectedSequence1));
            Console.WriteLine(string.Format((IFormatProvider)null,
                                            "PairwiseOverlapAligner BVT : Aligned Second Sequence is '{0}'.",
                                            expectedSequence2));
        }
Ejemplo n.º 11
0
        /// <summary>
        /// Validates the Mummer align method for several test cases for the parameters passed.
        /// </summary>
        /// <param name="nodeName">Node name to be read from xml</param>
        /// <param name="isSeqList">Is MUMmer alignment with List of sequences</param>
        void ValidateMUMmerAlignGeneralTestCases(string nodeName, bool isFilePath,
                                                 bool isSeqList)
        {
            ISequence         referenceSeq           = null;
            ISequence         querySeq               = null;
            IList <ISequence> querySeqs              = null;
            string            referenceSequence      = string.Empty;
            string            querySequence          = string.Empty;
            IList <IPairwiseSequenceAlignment> align = null;
            IList <ISequence> referenceSeqs          = null;

            if (isFilePath)
            {
                // Gets the reference sequence from the configurtion file
                string filePath = _utilityObj._xmlUtil.GetTextValue(nodeName,
                                                                    Constants.FilePathNode);

                Assert.IsNotNull(filePath);
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                       "MUMmer BVT : Successfully validated the File Path '{0}'.", filePath));

                using (FastaParser parser = new FastaParser())
                {
                    referenceSeqs     = parser.Parse(filePath);
                    referenceSeq      = referenceSeqs[0];
                    referenceSequence = referenceSeq.ToString();

                    // Gets the reference sequence from the configurtion file
                    string queryFilePath = _utilityObj._xmlUtil.GetTextValue(nodeName,
                                                                             Constants.SearchSequenceFilePathNode);

                    Assert.IsNotNull(queryFilePath);
                    ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                           "MUMmer BVT : Successfully validated the Search File Path '{0}'.", queryFilePath));

                    querySeqs     = parser.Parse(queryFilePath);
                    querySeq      = querySeqs[0];
                    querySequence = querySeq.ToString();
                }
            }
            else
            {
                // Gets the reference sequence from the configurtion file
                referenceSequence = _utilityObj._xmlUtil.GetTextValue(nodeName,
                                                                      Constants.SequenceNode);

                string referenceSeqAlphabet = _utilityObj._xmlUtil.GetTextValue(nodeName,
                                                                                Constants.AlphabetNameNode);

                referenceSeq = new Sequence(Utility.GetAlphabet(referenceSeqAlphabet),
                                            referenceSequence);

                querySequence = _utilityObj._xmlUtil.GetTextValue(nodeName,
                                                                  Constants.SearchSequenceNode);

                referenceSeqAlphabet = _utilityObj._xmlUtil.GetTextValue(nodeName,
                                                                         Constants.SearchSequenceAlphabetNode);

                querySeq = new Sequence(Utility.GetAlphabet(referenceSeqAlphabet),
                                        querySequence);
                querySeqs = new List <ISequence>();
                querySeqs.Add(querySeq);
            }

            string mumLength = _utilityObj._xmlUtil.GetTextValue(nodeName, Constants.MUMAlignLengthNode);

            MUMmer mum = new MUMmer3();

            mum.LengthOfMUM       = long.Parse(mumLength, null);
            mum.PairWiseAlgorithm = new NeedlemanWunschAligner();
            mum.GapOpenCost       = int.Parse(_utilityObj._xmlUtil.GetTextValue(nodeName, Constants.GapOpenCostNode),
                                              (IFormatProvider)null);

            if (isSeqList)
            {
                querySeqs.Add(referenceSeq);
                align = mum.Align(querySeqs);
            }
            else
            {
                align = mum.AlignSimple(referenceSeq, querySeqs);
            }

            string expectedScore = _utilityObj._xmlUtil.GetTextValue(nodeName, Constants.ScoreNodeName);

            Assert.AreEqual(expectedScore,
                            align[0].PairwiseAlignedSequences[0].Score.ToString((IFormatProvider)null));
            Console.WriteLine(string.Format((IFormatProvider)null,
                                            "MUMmer BVT : Successfully validated the score for the sequence '{0}' and '{1}'.",
                                            referenceSequence, querySequence));
            ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                   "MUMmer BVT : Successfully validated the score for the sequence '{0}' and '{1}'.",
                                                   referenceSequence, querySequence));

            string[] expectedSequences = _utilityObj._xmlUtil.GetTextValues(nodeName,
                                                                            Constants.ExpectedSequencesNode);

            IList <IPairwiseSequenceAlignment> expectedOutput = new List <IPairwiseSequenceAlignment>();

            IPairwiseSequenceAlignment seqAlign   = new PairwiseSequenceAlignment();
            PairwiseAlignedSequence    alignedSeq = new PairwiseAlignedSequence();

            alignedSeq.FirstSequence  = new Sequence(referenceSeq.Alphabet, expectedSequences[0]);
            alignedSeq.SecondSequence = new Sequence(referenceSeq.Alphabet, expectedSequences[1]);
            alignedSeq.Score          = Convert.ToInt32(expectedScore, (IFormatProvider)null);
            seqAlign.PairwiseAlignedSequences.Add(alignedSeq);
            expectedOutput.Add(seqAlign);
            Assert.IsTrue(CompareAlignment(align, expectedOutput));
            Console.WriteLine("MUMmer BVT : Successfully validated the aligned sequences.");
            ApplicationLog.WriteLine("MUMmer BVT : Successfully validated the aligned sequences.");
        }
        public void ValidatePairwiseAlignedSequenceMultipleRefQuery()
        {
            var referenceSeqs = new List <ISequence>()
            {
                new Sequence(Alphabets.DNA, "ATGCGCATCCCC")
                {
                    ID = "R1"
                },
                new Sequence(Alphabets.DNA, "TAGCT")
                {
                    ID = "R2"
                }
            };

            var searchSeqs = new List <ISequence>()
            {
                new Sequence(Alphabets.DNA, "CCGCGCCCCCTC")
                {
                    ID = "Q1"
                },
                new Sequence(Alphabets.DNA, "AGCT")
                {
                    ID = "Q2"
                }
            };

            var nucmer = new NucmerPairwiseAligner
            {
                FixedSeparation  = 0,
                MinimumScore     = 2,
                SeparationFactor = -1,
                LengthOfMUM      = 3,
                ForwardOnly      = true,
            };

            IList <IPairwiseSequenceAlignment> result = nucmer.Align(referenceSeqs, searchSeqs).Select(a => a as IPairwiseSequenceAlignment).ToList();

            // Check if output is not null
            Assert.AreNotEqual(null, result);

            IList <IPairwiseSequenceAlignment> expectedOutput = new List <IPairwiseSequenceAlignment>();
            IPairwiseSequenceAlignment         align          = new PairwiseSequenceAlignment();
            var alignedSeq = new PairwiseAlignedSequence
            {
                FirstSequence  = new Sequence(Alphabets.DNA, "GCGCATCCCC"),
                SecondSequence = new Sequence(Alphabets.DNA, "GCGC--CCCC"),
                Consensus      = new Sequence(Alphabets.DNA, "GCGCATCCCC"),
                Score          = -5,
                FirstOffset    = 0,
                SecondOffset   = 0
            };

            align.PairwiseAlignedSequences.Add(alignedSeq);
            expectedOutput.Add(align);

            align      = new PairwiseSequenceAlignment();
            alignedSeq = new PairwiseAlignedSequence
            {
                FirstSequence  = new Sequence(Alphabets.DNA, "AGCT"),
                SecondSequence = new Sequence(Alphabets.DNA, "AGCT"),
                Consensus      = new Sequence(Alphabets.DNA, "AGCT"),
                Score          = 12,
                FirstOffset    = 0,
                SecondOffset   = 1
            };
            align.PairwiseAlignedSequences.Add(alignedSeq);
            expectedOutput.Add(align);

            Assert.IsTrue(AlignmentHelpers.CompareAlignment(result, expectedOutput));
            ApplicationLog.WriteLine("PairwiseAlignedSequence P1: Successfully validated Sequence with Multiple Reference.");
        }