예제 #1
0
        public void SmithWatermanProteinSeqAffineGap()
        {
            IPairwiseSequenceAligner sw = new SmithWatermanAligner
                {
                    SimilarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum62),
                    GapOpenCost = -8,
                    GapExtensionCost = -1,
                };

            ISequence sequence1 = new Sequence(Alphabets.Protein, "HEAGAWGHEE");
            ISequence sequence2 = new Sequence(Alphabets.Protein, "PAWHEAE");
            IList<IPairwiseSequenceAlignment> result = sw.Align(sequence1, sequence2);
            AlignmentHelpers.LogResult(sw, result);

            IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>();
            IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment();
            PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence
                {
                    FirstSequence = new Sequence(Alphabets.Protein, "AWGHE"),
                    SecondSequence = new Sequence(Alphabets.Protein, "AW-HE"),
                    Consensus = new Sequence(Alphabets.AmbiguousProtein, "AWGHE"),
                    Score = 20,
                    FirstOffset = 0,
                    SecondOffset = 3
                };
            align.PairwiseAlignedSequences.Add(alignedSeq);
            expectedOutput.Add(align);

            Assert.IsTrue(AlignmentHelpers.CompareAlignment(result, expectedOutput));
        }
예제 #2
0
        /// <summary>
        /// Initializes static members of the SequenceAligners class.
        /// Static constructor
        /// </summary>
        static SequenceAligners()
        {
            NUCmer = new NucmerPairwiseAligner();
            MUMmer = new MUMmerAligner();
            NeedlemanWunsch = new NeedlemanWunschAligner();
            SmithWaterman = new SmithWatermanAligner();

            var knownAligners = new List<ISequenceAligner> { SmithWaterman, NeedlemanWunsch, MUMmer, NUCmer };

            // Get the registered aligners
            IEnumerable<ISequenceAligner> registeredAligners = GetAligners();
            if (null != registeredAligners)
            {
                knownAligners.AddRange(registeredAligners
                    .Where(aligner => aligner != null 
                        && knownAligners.All(sa => string.Compare(sa.Name, aligner.Name, StringComparison.OrdinalIgnoreCase) != 0)));
            }

            All = knownAligners;
        }
예제 #3
0
        /// <summary>
        /// Initializes static members of the SequenceAligners class.
        /// Static constructor
        /// </summary>
        static SequenceAligners()
        {
            NUCmer          = new NucmerPairwiseAligner();
            MUMmer          = new MUMmerAligner();
            NeedlemanWunsch = new NeedlemanWunschAligner();
            SmithWaterman   = new SmithWatermanAligner();

            var knownAligners = new List <ISequenceAligner> {
                SmithWaterman, NeedlemanWunsch, MUMmer, NUCmer
            };

            // Get the registered aligners
            IEnumerable <ISequenceAligner> registeredAligners = GetAligners();

            if (null != registeredAligners)
            {
                knownAligners.AddRange(registeredAligners
                                       .Where(aligner => aligner != null &&
                                              knownAligners.All(sa => string.Compare(sa.Name, aligner.Name, StringComparison.OrdinalIgnoreCase) != 0)));
            }

            All = knownAligners;
        }
예제 #4
0
        public static List <ContinuousFrequencyIndelGenotype> CallIndelsFromPathCollection(DeBruijnPathList paths, DeBruijnGraph graph)
        {
            var sequences = paths.Paths.Select(z => new IndelData(z, graph.KmerLength)).Where(z => z.OkayData).ToList();

            if (sequences.Count == 0)
            {
                return(new List <ContinuousFrequencyIndelGenotype>());
            }
            // Get the reference sequence.
            var regionStart = sequences.Min(x => x.LikelyStart) - IndelPathCollection.AlignmentPadding - graph.KmerLength;
            var regionEnd   = sequences.Max(x => x.LikelyEnd) + IndelPathCollection.AlignmentPadding + graph.KmerLength;
            var reference   = HaploGrepSharp.ReferenceGenome.GetReferenceSequenceSection(regionStart, regionEnd);

            // Setup the aligner with appropriate parameters.
            var algo = new Bio.Algorithms.Alignment.SmithWatermanAligner();

            algo.SimilarityMatrix = new Bio.SimilarityMatrices.DiagonalSimilarityMatrix(1, -1);
            algo.GapOpenCost      = -2;
            algo.GapExtensionCost = -1;

            // Execute the alignment and go through and generate variants.
            Dictionary <IndelData, List <IndelLocation> > indels = new Dictionary <IndelData, List <IndelLocation> >();

            foreach (var s in sequences)
            {
                //Note, do not change alignment order here.
                var aln         = algo.Align(reference.Seq, s.Seq);
                var res         = aln[0].PairwiseAlignedSequences[0];
                var indels_locs = FindIndels(res);
                indels[s] = indels_locs;
            }
            // Now to group indels by unique starts and collect them.
            var locations = indels.Values.SelectMany(z => z).ToList().Distinct().GroupBy(z => z.Start);
            var toReturn  = new List <ContinuousFrequencyIndelGenotype>(10);

            // Note: Typically there will only be one Indel location per alignment
            foreach (var g in locations)
            {
                var g2     = g.ToList();
                var lspots = g2.Select(x => x.DeletionOnReference).Distinct().Count();
                if (lspots > 1)
                {
                    throw new NotImplementedException("Same location had indels present on both reference and reads.  This is an edge case not handled");
                }
                // Add a fake reference allele.
                var first    = g2[0];
                var no_indel = new IndelLocation(first.DeletionOnReference, first.Start, 0);
                no_indel.InsertedSequence = String.Empty;
                g2.Add(no_indel);
                //sort by location
                g2.Sort();
                double[] counts = new double[g2.Count];
                //now add counts from each.
                foreach (var s in sequences)
                {
                    var cur   = indels[s];
                    int index = 0;
                    var atLoc = cur.Where(x => x.Start == first.Start).FirstOrDefault();
                    if (atLoc == null)
                    {
                        counts[index] += s.MinKmerCount;
                    }
                    else
                    {
                        bool found = false;
                        for (int i = 1; i < g2.Count; i++)
                        {
                            if (g2[i].Equals(atLoc))
                            {
                                counts[i] += s.MinKmerCount;
                                found      = true;
                                break;
                            }
                        }
                        if (!found)
                        {
                            throw new InvalidProgramException("Point should never be reached");
                        }
                    }
                }
                var types      = g2.Select(x => x.InsertedSequence).ToList();
                var ref_loc    = HaploGrepSharp.ReferenceGenome.ConvertTorCRSPosition(first.Start + regionStart);
                var indel_call = new ContinuousFrequencyIndelGenotype(first.DeletionOnReference, types, counts, ref_loc);
                toReturn.Add(indel_call);
            }
            return(toReturn);
        }
예제 #5
0
        public void SmithWatermanAlignerMultipleAlignments1()
        {
            IPairwiseSequenceAligner sw = new SmithWatermanAligner
            {
                SimilarityMatrix = new DiagonalSimilarityMatrix(5, -20), 
                GapOpenCost = -5
            };

            ISequence sequence1 = new Sequence(Alphabets.DNA, "AAATTCCCAG");
            ISequence sequence2 = new Sequence(Alphabets.DNA, "AAAGCCC");
            IList<IPairwiseSequenceAlignment> result = sw.AlignSimple(sequence1, sequence2);
            AlignmentHelpers.LogResult(sw, result);

            IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>();
            IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(sequence1, sequence2);

            // First alignment
            align.PairwiseAlignedSequences.Add(new PairwiseAlignedSequence
            {
                FirstSequence = new Sequence(Alphabets.DNA, "AAA"),
                SecondSequence = new Sequence(Alphabets.DNA, "AAA"),
                Consensus = new Sequence(Alphabets.DNA, "AAA"),
                Score = 15,
                FirstOffset = 0,
                SecondOffset = 0
            });

            // Second alignment
            align.PairwiseAlignedSequences.Add(new PairwiseAlignedSequence
            {
                FirstSequence = new Sequence(Alphabets.DNA, "CCC"),
                SecondSequence = new Sequence(Alphabets.DNA, "CCC"),
                Consensus = new Sequence(Alphabets.DNA, "CCC"),
                Score = 15,
                FirstOffset = 0,
                SecondOffset = 1
            });

            expectedOutput.Add(align);
            Assert.IsTrue(AlignmentHelpers.CompareAlignment(result, expectedOutput));
        }
예제 #6
0
        private void InValidateSmithWatermanAlignmentWithInvalidSimilarityMatrix(string nodeName,
                                                                                 bool isTextFile,
                                                                                 SimilarityMatrixInvalidTypes
                                                                                     invalidType,
                                                                                 AlignParameters additionalParameter,
                                                                                 AlignmentType alignType)
        {
            Sequence aInput = null;
            Sequence bInput = null;
            ISequence inputSequence1;
            ISequence inputSequence2;

            IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                                     Constants.AlphabetNameNode));
            if (isTextFile)
            {
                // Read the xml file for getting both the files for aligning.
                string firstInputFilepath = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                            Constants.FilePathNode1);
                string secondInputFilepath = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                             Constants.FilePathNode2);

                // Parse the files and get the sequence.
                var parseObjectForFile1 = new FastAParser { Alphabet = alphabet };
                inputSequence1 = parseObjectForFile1.Parse(firstInputFilepath).ElementAt(0);
                inputSequence2 = parseObjectForFile1.Parse(secondInputFilepath).ElementAt(0);

                // Create input sequence for sequence string in different cases.
                GetSequenceWithCaseType(new string(inputSequence1.Select(a => (char) a).ToArray()),
                                        new string(inputSequence2.Select(a => (char) a).ToArray()),
                                        alphabet, SequenceCaseType.LowerCase, out aInput, out bInput);
            }
            else
            {
                string firstInputSequence = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode1);
                string secondInputSequence = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode2);

                // Create input sequence for sequence string in different cases.
                GetSequenceWithCaseType(firstInputSequence, secondInputSequence,
                                        alphabet, SequenceCaseType.LowerCase, out aInput, out bInput);
            }

            ApplicationLog.WriteLine(string.Concat(
                "SmithWatermanAligner P2 : First sequence used is '{0}'.",
                new string(aInput.Select(a => (char) a).ToArray())));
            ApplicationLog.WriteLine(string.Concat(
                "SmithWatermanAligner P2 : Second sequence used is '{0}'.",
                new string(bInput.Select(a => (char) a).ToArray())));

            // Create similarity matrix object for a invalid file.
            string blosumFilePath = this.GetSimilarityMatrixFileWithInvalidType(nodeName, invalidType);
            Exception actualExpection = null;

            // For invalid similarity matrix data format; exception will be thrown while instantiating
            SimilarityMatrix sm = null;
            try
            {
                if (invalidType != SimilarityMatrixInvalidTypes.NullSimilarityMatrix)
                {
                    sm = new SimilarityMatrix(new StreamReader(blosumFilePath));
                }
            }
            catch (InvalidDataException ex)
            {
                actualExpection = ex;
            }

            // For non matching similarity matrix exception will be thrown while alignment
            if (actualExpection == null)
            {
                int gapOpenCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                            Constants.GapOpenCostNode), null);

                int gapExtensionCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                                 Constants.GapExtensionCostNode), null);

                // Create SmithWatermanAligner instance and set its values.
                var smithWatermanObj = new SmithWatermanAligner();
                if (additionalParameter != AlignParameters.AllParam)
                {
                    smithWatermanObj.SimilarityMatrix = sm;
                    smithWatermanObj.GapOpenCost = gapOpenCost;
                    smithWatermanObj.GapExtensionCost = gapExtensionCost;
                }

                // Align the input sequences and catch the exception.
                switch (additionalParameter)
                {
                    case AlignParameters.AlignList:
                        switch (alignType)
                        {
                            case AlignmentType.Align:
                                try
                                {
                                    smithWatermanObj.Align(new List<ISequence> {aInput, bInput});
                                }
                                catch (ArgumentException ex)
                                {
                                    actualExpection = ex;
                                }
                                break;
                            default:
                                try
                                {
                                    smithWatermanObj.AlignSimple(new List<ISequence> {aInput, bInput});
                                }
                                catch (ArgumentException ex)
                                {
                                    actualExpection = ex;
                                }
                                break;
                        }
                        break;
                    case AlignParameters.AlignTwo:
                        switch (alignType)
                        {
                            case AlignmentType.Align:
                                try
                                {
                                    smithWatermanObj.Align(aInput, bInput);
                                }
                                catch (ArgumentException ex)
                                {
                                    actualExpection = ex;
                                }
                                break;
                            default:
                                try
                                {
                                    smithWatermanObj.AlignSimple(aInput, bInput);
                                }
                                catch (ArgumentException ex)
                                {
                                    actualExpection = ex;
                                }
                                break;
                        }
                        break;
                    case AlignParameters.AllParam:
                        switch (alignType)
                        {
                            case AlignmentType.Align:
                                try
                                {
                                    smithWatermanObj.Align(sm, gapOpenCost, gapExtensionCost,
                                                           aInput, bInput);
                                }
                                catch (ArgumentException ex)
                                {
                                    actualExpection = ex;
                                }
                                break;
                            default:
                                try
                                {
                                    smithWatermanObj.AlignSimple(sm, gapOpenCost, aInput, bInput);
                                }
                                catch (ArgumentException ex)
                                {
                                    actualExpection = ex;
                                }
                                break;
                        }
                        break;
                    default:
                        break;
                }
            }

            // Validate that expected exception is thrown using error message.
            string expectedErrorMessage = this.GetExpectedErrorMeesageWithInvalidSimilarityMatrixType(nodeName,
                                                                                                 invalidType);
            Assert.AreEqual(expectedErrorMessage, actualExpection.Message);

            ApplicationLog.WriteLine(string.Concat(
                "SmithWatermanAligner P2 : Expected Error message is thrown ", expectedErrorMessage));
        }
예제 #7
0
        private void InValidateSmithWatermanAlignmentWithInvalidSequence(string nodeName,
                                                                         bool isTextFile,
                                                                         InvalidSequenceType invalidSequenceType,
                                                                         AlignParameters additionalParameter,
                                                                         AlignmentType alignType,
                                                                         InvalidSequenceType sequenceType)
        {
            IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                                     Constants.AlphabetNameNode));

            Exception actualException = null;
            Sequence aInput = null;
            Sequence bInput = null;

            if (isTextFile)
            {
                // Read the xml file for getting both the files for aligning.
                string filepath = this.GetInputFileNameWithInvalidType(nodeName, invalidSequenceType);

                // Create input sequence for sequence string in different cases.
                try
                {
                    // Parse the files and get the sequence.
                    IEnumerable<ISequence> seqs = null;
                    var parser = new FastAParser();
                    seqs = parser.Parse(filepath);
                    aInput = new Sequence(alphabet, new string(seqs.ElementAt(0).Select(a => (char) a).ToArray()));
                }
                catch (Exception ex)
                {
                    actualException = ex;
                }
            }
            else
            {
                string originalSequence = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                          Constants.InvalidSequence1);

                // Create input sequence for sequence string in different cases.
                try
                {
                    aInput = new Sequence(alphabet, originalSequence);
                }
                catch (ArgumentException ex)
                {
                    actualException = ex;
                }
            }

            if (actualException == null)
            {
                bInput = aInput;

                // Create similarity matrix object for a given file.
                string blosumFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                        Constants.BlosumFilePathNode);

                var sm = new SimilarityMatrix(new StreamReader(blosumFilePath));

                int gapOpenCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                            Constants.GapOpenCostNode), null);

                int gapExtensionCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                                 Constants.GapExtensionCostNode), null);

                // Create SmithWatermanAligner instance and set its values.
                var smithWatermanObj = new SmithWatermanAligner();
                if (additionalParameter != AlignParameters.AllParam)
                {
                    smithWatermanObj.SimilarityMatrix = sm;
                    smithWatermanObj.GapOpenCost = gapOpenCost;
                    smithWatermanObj.GapExtensionCost = gapExtensionCost;
                }

                // Align the input sequences and catch the exception.
                switch (additionalParameter)
                {
                    case AlignParameters.AlignList:
                        switch (alignType)
                        {
                            case AlignmentType.Align:
                                try
                                {
                                    smithWatermanObj.Align(new List<ISequence> {aInput, bInput});
                                }
                                catch (ArgumentException ex)
                                {
                                    actualException = ex;
                                }
                                break;
                            default:
                                try
                                {
                                    smithWatermanObj.AlignSimple(new List<ISequence> {aInput, bInput});
                                }
                                catch (ArgumentException ex)
                                {
                                    actualException = ex;
                                }
                                break;
                        }
                        break;
                    case AlignParameters.AlignTwo:
                        switch (alignType)
                        {
                            case AlignmentType.Align:
                                try
                                {
                                    smithWatermanObj.Align(aInput, bInput);
                                }
                                catch (ArgumentException ex)
                                {
                                    actualException = ex;
                                }
                                break;
                            default:
                                try
                                {
                                    smithWatermanObj.AlignSimple(aInput, bInput);
                                }
                                catch (ArgumentException ex)
                                {
                                    actualException = ex;
                                }
                                break;
                        }
                        break;
                    case AlignParameters.AllParam:
                        switch (alignType)
                        {
                            case AlignmentType.Align:
                                try
                                {
                                    smithWatermanObj.Align(sm, gapOpenCost,
                                                           gapExtensionCost, aInput, bInput);
                                }
                                catch (ArgumentException ex)
                                {
                                    actualException = ex;
                                }
                                break;
                            default:
                                try
                                {
                                    smithWatermanObj.AlignSimple(sm, gapOpenCost,
                                                                 aInput, bInput);
                                }
                                catch (ArgumentException ex)
                                {
                                    actualException = ex;
                                }
                                break;
                        }
                        break;
                    default:
                        break;
                }
            }

            // Validate Error messages for Invalid Sequence types.
            string expectedErrorMessage = this.GetExpectedErrorMeesageWithInvalidSequenceType(nodeName,
                                                                                         sequenceType);

            Assert.AreEqual(expectedErrorMessage, actualException.Message);

            ApplicationLog.WriteLine(string.Concat(
                "SmithWatermanAligner P2 : Expected Error message is thrown ", expectedErrorMessage));
        }
예제 #8
0
        public void SmithWatermanGotohAlignSequenceWithMultiplePossibleIndelPositions()
        {
            var longRef = "TGACCCCGAGGGGGCCCGGGGCCGCGTCCCTGGGCCCTCCCCA";
            var longHapOrg = "TGACCCCGAGGG---CCGGG--------------CCCTCCCCA";
            var longHap = longHapOrg.Replace("-", "");
            var lr = new Sequence(DnaAlphabet.Instance, longRef);
            var lh = new Sequence(DnaAlphabet.Instance, longHap);
            
            var al = new SmithWatermanAligner
                     {
                         SimilarityMatrix = new DiagonalSimilarityMatrix(20, -15),
                         GapOpenCost = -26,
                         GapExtensionCost = -1
                     };

            var res = al.Align(lr, lh);
            var results = res.First();
            var alignedSeq = results.AlignedSequences[0].Sequences[1].ToString();
            Assert.AreEqual(alignedSeq, longHapOrg);
        }
예제 #9
0
        private void ValidateSmithWatermanAlignment(string nodeName, bool isTextFile,
                                                    SequenceCaseType caseType, AlignParameters additionalParameter,
                                                    AlignmentType alignType,
                                                    SimilarityMatrixParameters similarityMatrixParam)
        {
            Sequence aInput, bInput;
            IAlphabet alphabet =
                Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode));

            if (isTextFile)
            {
                // Read the xml file for getting both the files for aligning.
                string filePath1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode1);
                string filePath2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode2);

                // Parse the files and get the sequence.
                ISequence originalSequence1 = null;
                ISequence originalSequence2 = null;

                var parseObjectForFile1 = new FastAParser { Alphabet = alphabet };
                originalSequence1 = parseObjectForFile1.Parse(filePath1).ElementAt(0);
                originalSequence2 = parseObjectForFile1.Parse(filePath2).ElementAt(0);

                // Create input sequence for sequence string in different cases.             
                GetSequenceWithCaseType(originalSequence1.ConvertToString(), originalSequence2.ConvertToString(),
                                        alphabet, caseType, out aInput, out bInput);
            }
            else
            {
                string originalSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode1);
                string originalSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode2);

                // Create input sequence for sequence string in different cases.
                GetSequenceWithCaseType(
                    originalSequence1,
                    originalSequence2,
                    alphabet,
                    caseType,
                    out aInput,
                    out bInput);
            }

            ApplicationLog.WriteLine(string.Format("SmithWatermanAligner P2 : First sequence used is '{0}'.",
                                                   aInput.ConvertToString()));
            ApplicationLog.WriteLine(string.Format("SmithWatermanAligner P2 : Second sequence used is '{0}'.",
                                                   bInput.ConvertToString()));

            // Create similarity matrix object for a given file.
            string blosumFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.BlosumFilePathNode);
            SimilarityMatrix sm;
            switch (similarityMatrixParam)
            {
                case SimilarityMatrixParameters.TextReader:
                    using (TextReader reader = new StreamReader(blosumFilePath))
                        sm = new SimilarityMatrix(reader);
                    break;
                case SimilarityMatrixParameters.DiagonalMatrix:
                    string matchValue = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MatchScoreNode);
                    string misMatchValue = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                           Constants.MisMatchScoreNode);
                    sm = new DiagonalSimilarityMatrix(int.Parse(matchValue, null),
                                                      int.Parse(misMatchValue, null));
                    break;
                default:
                    sm = new SimilarityMatrix(new StreamReader(blosumFilePath));
                    break;
            }

            int gapOpenCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapOpenCostNode), null);
            int gapExtensionCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapExtensionCostNode),
                                             null);

            // Create SmithWatermanAligner instance and set its values.
            var smithWatermanObj = new SmithWatermanAligner();
            if (additionalParameter != AlignParameters.AllParam)
            {
                smithWatermanObj.SimilarityMatrix = sm;
                smithWatermanObj.GapOpenCost = gapOpenCost;
                smithWatermanObj.GapExtensionCost = gapExtensionCost;
            }
            IList<IPairwiseSequenceAlignment> result = null;

            // Align the input sequences.
            switch (additionalParameter)
            {
                case AlignParameters.AlignList:
                    switch (alignType)
                    {
                        case AlignmentType.Align:
                            result = smithWatermanObj.Align(new List<ISequence> {aInput, bInput});
                            break;
                        default:
                            result = smithWatermanObj.AlignSimple(new List<ISequence> {aInput, bInput});
                            break;
                    }
                    break;
                case AlignParameters.AlignTwo:
                    switch (alignType)
                    {
                        case AlignmentType.Align:
                            result = smithWatermanObj.Align(aInput, bInput);
                            break;
                        default:
                            result = smithWatermanObj.AlignSimple(aInput, bInput);
                            break;
                    }
                    break;
                case AlignParameters.AllParam:
                    switch (alignType)
                    {
                        case AlignmentType.Align:
                            result = smithWatermanObj.Align(sm, gapOpenCost,
                                                            gapExtensionCost, aInput, bInput);
                            break;
                        default:
                            result = smithWatermanObj.AlignSimple(sm, gapOpenCost, aInput, bInput);
                            break;
                    }
                    break;
                default:
                    break;
            }

            // Get the expected sequence and scorde from xml config.
            string expectedSequence1, expectedSequence2, expectedScore;

            switch (alignType)
            {
                case AlignmentType.Align:
                    expectedScore = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                    Constants.ExpectedGapExtensionScoreNode);

                    switch (caseType)
                    {
                        case SequenceCaseType.LowerCase:
                            expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                                Constants
                                                                                    .ExpectedGapExtensionSequence1InLower);
                            expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                                Constants
                                                                                    .ExpectedGapExtensionSequence2InLower);
                            break;
                        default:
                            expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                                Constants
                                                                                    .ExpectedGapExtensionSequence1Node);
                            expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                                Constants
                                                                                    .ExpectedGapExtensionSequence2Node);
                            break;
                    }
                    break;
                default:
                    expectedScore = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                    Constants.ExpectedScoreNode);

                    switch (caseType)
                    {
                        case SequenceCaseType.LowerCase:
                            expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                                Constants.ExpectedSequence1inLowerNode);
                            expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                                Constants.ExpectedSequence2inLowerNode);
                            break;
                        case SequenceCaseType.LowerUpperCase:
                            expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                                Constants.ExpectedSequence1inLowerNode);
                            expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                                Constants.ExpectedSequenceNode2);
                            break;
                        default:
                            expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                                Constants.ExpectedSequenceNode1);
                            expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                                Constants.ExpectedSequenceNode2);
                            break;
                    }

                    break;
            }

            // Match the alignment result with expected result.
            IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>();

            IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment();
            var alignedSeq = new PairwiseAlignedSequence
                                 {
                                     FirstSequence = new Sequence(alphabet, expectedSequence1),
                                     SecondSequence = new Sequence(alphabet, expectedSequence2),
                                     Score = Convert.ToInt32(expectedScore, null),
                                     FirstOffset = Int32.MinValue,
                                     SecondOffset = Int32.MinValue,
                                 };
            align.PairwiseAlignedSequences.Add(alignedSeq);
            expectedOutput.Add(align);

            ApplicationLog.WriteLine(string.Format(null, "SmithWatermanAligner P2 : Final Score '{0}'.", expectedScore));
            ApplicationLog.WriteLine(string.Format(null, "SmithWatermanAligner P2 : Aligned First Sequence is '{0}'.",
                                                   expectedSequence1));
            ApplicationLog.WriteLine(string.Format(null, "SmithWatermanAligner P2 : Aligned Second Sequence is '{0}'.",
                                                   expectedSequence2));

            Assert.IsTrue(CompareAlignment(result, expectedOutput));
        }
예제 #10
0
 public void SmithWatermanThrowsExceptionWhenTooLarge()
 {
     // What size squared is too large?
     int seq_size = (int)Math.Sqrt ((double)Int32.MaxValue) + 5;
     byte[] seq = new byte[seq_size];
     // Now let's generate sequences of those size
     for (int i = 0; i < seq.Length; i++) {
         seq [i] = (byte)'A';
     }
     var seq1 = new Sequence (DnaAlphabet.Instance, seq, false);
     var na = new SmithWatermanAligner ();
     Assert.Throws(typeof(ArgumentOutOfRangeException),
         () => na.Align(seq1, seq1));
 }
예제 #11
0
        private void ValidateSmithWatermanAlignment(string nodeName, AlignParameters alignParam,
                                                    SimilarityMatrixParameters similarityMatrixParam,
                                                    AlignmentType alignType)
        {
            ISequence aInput, bInput;
            IAlphabet alphabet =
                Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode));

            // Parse the files and get the sequence.
            if (alignParam.ToString().Contains("Code"))
            {
                string sequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode1);
                string sequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode2);

                aInput = new Sequence(alphabet, sequence1);
                bInput = new Sequence(alphabet, sequence2);
            }
            else
            {
                // Read the xml file for getting both the files for aligning.
                string filePath1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode1);
                string filePath2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode2);

                var parseObjectForFile1 = new FastAParser { Alphabet = alphabet };
                ISequence originalSequence1 = parseObjectForFile1.Parse(filePath1).FirstOrDefault();
                Assert.IsNotNull(originalSequence1);
                aInput = new Sequence(alphabet, originalSequence1.ConvertToString());

                var parseObjectForFile2 = new FastAParser { Alphabet = alphabet };
                ISequence originalSequence2 = parseObjectForFile2.Parse(filePath2).FirstOrDefault();
                Assert.IsNotNull(originalSequence2);
                bInput = new Sequence(alphabet, originalSequence2.ConvertToString());
            }

            string blosumFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.BlosumFilePathNode);
            SimilarityMatrix sm;

            switch (similarityMatrixParam)
            {
                case SimilarityMatrixParameters.TextReader:
                    using (TextReader reader = new StreamReader(blosumFilePath))
                        sm = new SimilarityMatrix(reader);
                    break;
                case SimilarityMatrixParameters.DiagonalMatrix:
                    string matchValue = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                        Constants.MatchScoreNode);
                    string misMatchValue = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                           Constants.MisMatchScoreNode);
                    sm = new DiagonalSimilarityMatrix(int.Parse(matchValue, null),
                                                      int.Parse(misMatchValue, null));
                    break;
                default:
                    sm = new SimilarityMatrix(new StreamReader(blosumFilePath));
                    break;
            }

            int gapOpenCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapOpenCostNode), null);
            int gapExtensionCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapExtensionCostNode),
                                             null);

            var smithWatermanObj = new SmithWatermanAligner();

            if (AlignParameters.AllParam != alignParam)
            {
                smithWatermanObj.SimilarityMatrix = sm;
                smithWatermanObj.GapOpenCost = gapOpenCost;
            }

            IList<IPairwiseSequenceAlignment> result = null;

            switch (alignParam)
            {
                case AlignParameters.AlignList:
                case AlignParameters.AlignListCode:
                    var sequences = new List<ISequence> {aInput, bInput};
                    switch (alignType)
                    {
                        case AlignmentType.Align:
                            smithWatermanObj.GapExtensionCost = gapExtensionCost;
                            result = smithWatermanObj.Align(sequences);
                            break;
                        default:
                            result = smithWatermanObj.AlignSimple(sequences);
                            break;
                    }
                    break;
                case AlignParameters.AllParam:
                case AlignParameters.AllParamCode:
                    switch (alignType)
                    {
                        case AlignmentType.Align:
                            smithWatermanObj.GapExtensionCost = gapExtensionCost;
                            result = smithWatermanObj.Align(sm, gapOpenCost, gapExtensionCost, aInput, bInput);
                            break;
                        default:
                            result = smithWatermanObj.AlignSimple(sm, gapOpenCost, aInput, bInput);
                            break;
                    }
                    break;
                case AlignParameters.AlignTwo:
                case AlignParameters.AlignTwoCode:
                    switch (alignType)
                    {
                        case AlignmentType.Align:
                            smithWatermanObj.GapExtensionCost = gapExtensionCost;
                            result = smithWatermanObj.Align(aInput, bInput);
                            break;
                        default:
                            result = smithWatermanObj.AlignSimple(aInput, bInput);
                            break;
                    }
                    break;
                default:
                    break;
            }
            // Read the xml file for getting both the files for aligning.
            string expectedSequence1, expectedSequence2, expectedScore;

            switch (alignType)
            {
                case AlignmentType.Align:
                    expectedScore = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedGapExtensionScoreNode);
                    expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                        Constants.ExpectedGapExtensionSequence1Node);
                    expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                        Constants.ExpectedGapExtensionSequence2Node);
                    break;
                default:
                    expectedScore = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedScoreNode);
                    expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode1);
                    expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode2);
                    break;
            }

            IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>();

            IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment();
            var alignedSeq = new PairwiseAlignedSequence
                                 {
                                     FirstSequence = new Sequence(alphabet, expectedSequence1),
                                     SecondSequence = new Sequence(alphabet, expectedSequence2),
                                     Score = Convert.ToInt32(expectedScore, null),
                                     FirstOffset = Int32.MinValue,
                                     SecondOffset = Int32.MinValue,
                                 };
            align.PairwiseAlignedSequences.Add(alignedSeq);
            expectedOutput.Add(align);

            ApplicationLog.WriteLine(string.Format("SmithWatermanAligner P1 : Final Score '{0}'.", expectedScore));
            ApplicationLog.WriteLine(string.Format("SmithWatermanAligner P1 : Aligned First Sequence is '{0}'.",
                                                   expectedSequence1));
            ApplicationLog.WriteLine(string.Format("SmithWatermanAligner P1 : Aligned Second Sequence is '{0}'.",
                                                   expectedSequence2));

            Assert.IsTrue(CompareAlignment(result, expectedOutput));
        }
예제 #12
0
        private void ValidateSmithWatermanAlignment(bool isTextFile, AlignmentParamType alignParam,
                                                    AlignmentType alignType)
        {
            ISequence aInput, bInput;
            IAlphabet alphabet =
                Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(Constants.SmithWatermanAlignAlgorithmNodeName,
                                                                    Constants.AlphabetNameNode));

            if (isTextFile)
            {
                // Read the xml file for getting both the files for aligning.
                string filePath1 = this.utilityObj.xmlUtil.GetTextValue(Constants.SmithWatermanAlignAlgorithmNodeName,
                                                                   Constants.FilePathNode1);
                string filePath2 = this.utilityObj.xmlUtil.GetTextValue(Constants.SmithWatermanAlignAlgorithmNodeName,
                                                                   Constants.FilePathNode2);

                // Parse the files and get the sequence.
                var parseObjectForFile1 = new FastAParser();
                {
                    parseObjectForFile1.Alphabet = alphabet;
                    aInput = parseObjectForFile1.Parse(filePath1).First();
                }

                var parseObjectForFile2 = new FastAParser();
                {
                    parseObjectForFile2.Alphabet = alphabet;
                    bInput = parseObjectForFile2.Parse(filePath2).First();
                }
            }
            else
            {
                // Read the xml file for getting both the files for aligning.
                string origSequence1 = this.utilityObj.xmlUtil.GetTextValue(Constants.SmithWatermanAlignAlgorithmNodeName,
                                                                       Constants.SequenceNode1);
                string origSequence2 = this.utilityObj.xmlUtil.GetTextValue(Constants.SmithWatermanAlignAlgorithmNodeName,
                                                                       Constants.SequenceNode2);
                aInput = new Sequence(alphabet, origSequence1);
                bInput = new Sequence(alphabet, origSequence2);
            }

            string blosumFilePath = this.utilityObj.xmlUtil.GetTextValue(Constants.SmithWatermanAlignAlgorithmNodeName,
                                                                    Constants.BlosumFilePathNode);

            var sm = new SimilarityMatrix(new StreamReader(blosumFilePath));
            int gapOpenCost =
                int.Parse(
                    this.utilityObj.xmlUtil.GetTextValue(Constants.SmithWatermanAlignAlgorithmNodeName,
                                                    Constants.GapOpenCostNode), null);
            int gapExtensionCost =
                int.Parse(
                    this.utilityObj.xmlUtil.GetTextValue(Constants.SmithWatermanAlignAlgorithmNodeName,
                                                    Constants.GapExtensionCostNode), null);

            var smithWatermanObj = new SmithWatermanAligner();
            if (AlignmentParamType.AllParam != alignParam)
            {
                smithWatermanObj.SimilarityMatrix = sm;
                smithWatermanObj.GapOpenCost = gapOpenCost;
            }

            IList<IPairwiseSequenceAlignment> result = null;

            switch (alignParam)
            {
                case AlignmentParamType.AlignList:
                    switch (alignType)
                    {
                        case AlignmentType.Align:
                            result = smithWatermanObj.Align(new List<ISequence> {aInput, bInput});
                            break;
                        default:
                            result = smithWatermanObj.AlignSimple(new List<ISequence> {aInput, bInput});
                            break;
                    }
                    break;
                case AlignmentParamType.AlignTwo:
                    switch (alignType)
                    {
                        case AlignmentType.Align:
                            result = smithWatermanObj.Align(aInput, bInput);
                            break;
                        default:
                            result = smithWatermanObj.AlignSimple(aInput, bInput);
                            break;
                    }
                    break;
                case AlignmentParamType.AllParam:
                    switch (alignType)
                    {
                        case AlignmentType.Align:
                            result = smithWatermanObj.Align(sm, gapOpenCost,
                                                            gapExtensionCost, aInput, bInput);
                            break;
                        default:
                            result = smithWatermanObj.AlignSimple(sm, gapOpenCost, aInput, bInput);
                            break;
                    }
                    break;
                default:
                    break;
            }

            // Read the xml file for getting both the files for aligning.
            string expectedSequence1, expectedSequence2, expectedScore;

            switch (alignType)
            {
                case AlignmentType.Align:
                    expectedScore = this.utilityObj.xmlUtil.GetTextValue(
                        Constants.SmithWatermanAlignAlgorithmNodeName,
                        Constants.ExpectedGapExtensionScoreNode);
                    expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(
                        Constants.SmithWatermanAlignAlgorithmNodeName,
                        Constants.ExpectedGapExtensionSequence1Node);
                    expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(
                        Constants.SmithWatermanAlignAlgorithmNodeName,
                        Constants.ExpectedGapExtensionSequence2Node);
                    break;
                default:
                    expectedScore = this.utilityObj.xmlUtil.GetTextValue(
                        Constants.SmithWatermanAlignAlgorithmNodeName,
                        Constants.ExpectedScoreNode);
                    expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(
                        Constants.SmithWatermanAlignAlgorithmNodeName,
                        Constants.ExpectedSequenceNode1);
                    expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(
                        Constants.SmithWatermanAlignAlgorithmNodeName,
                        Constants.ExpectedSequenceNode2);
                    break;
            }

            IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>();

            IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment();
            var alignedSeq = new PairwiseAlignedSequence
                                 {
                                     FirstSequence = new Sequence(alphabet, expectedSequence1),
                                     SecondSequence = new Sequence(alphabet, expectedSequence2),
                                     Score = Convert.ToInt32(expectedScore, null),
                                     FirstOffset = Int32.MinValue,
                                     SecondOffset = Int32.MinValue,
                                 };
            align.PairwiseAlignedSequences.Add(alignedSeq);
            expectedOutput.Add(align);

            ApplicationLog.WriteLine(string.Format(null, "SmithWatermanAligner BVT : Final Score '{0}'.", expectedScore));
            ApplicationLog.WriteLine(string.Format(null, "SmithWatermanAligner BVT : Aligned First Sequence is '{0}'.",
                                                   expectedSequence1));
            ApplicationLog.WriteLine(string.Format(null, "SmithWatermanAligner BVT : Aligned Second Sequence is '{0}'.",
                                                   expectedSequence2));

            Assert.IsTrue(CompareAlignment(result, expectedOutput));
        }