Exemplo n.º 1
0
        public void TestNeedlemanWunschProfileAligner()
        {
            ISequence templateSequence = new Sequence(Alphabets.DNA, "ATGCSWRYKMBVHDN-");
            Dictionary <ISequenceItem, int> itemSet = new Dictionary <ISequenceItem, int>();

            for (int i = 0; i < templateSequence.Count; ++i)
            {
                itemSet.Add(templateSequence[i], i);
            }
            Profiles.ItemSet = itemSet;


            IProfileAligner  profileAligner   = new NeedlemanWunschProfileAligner();
            SimilarityMatrix similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrices.AmbiguousDna);
            int gapOpenPenalty   = -8;
            int gapExtendPenalty = -1;

            profileAligner.SimilarityMatrix = similarityMatrix;
            profileAligner.GapOpenCost      = gapOpenPenalty;
            profileAligner.GapExtensionCost = gapExtendPenalty;

            ISequence seqA = new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT");
            ISequence seqB = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG");

            List <ISequence> sequences = new List <ISequence>();

            sequences.Add(seqA);
            sequences.Add(seqB);

            IProfileAlignment profileAlignmentA = ProfileAlignment.GenerateProfileAlignment(sequences[0]);
            IProfileAlignment profileAlignmentB = ProfileAlignment.GenerateProfileAlignment(sequences[1]);

            profileAligner.Align(profileAlignmentA, profileAlignmentB);


            List <int> eStringSubtree  = profileAligner.GenerateEString(profileAligner.AlignedA);
            List <int> eStringSubtreeB = profileAligner.GenerateEString(profileAligner.AlignedB);

            List <ISequence> alignedSequences = new List <ISequence>();

            ISequence seq = profileAligner.GenerateSequenceFromEString(eStringSubtree, sequences[0]);

            alignedSequences.Add(seq);
            seq = profileAligner.GenerateSequenceFromEString(eStringSubtreeB, sequences[1]);
            alignedSequences.Add(seq);

            float profileScore = MsaUtils.MultipleAlignmentScoreFunction(alignedSequences, similarityMatrix, gapOpenPenalty, gapExtendPenalty);

            ISequence expectedSeqA = new Sequence(Alphabets.DNA, "GGGAA---AAATCAGATT");
            ISequence expectedSeqB = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG---");

            Assert.AreEqual(expectedSeqA.ToString(), alignedSequences[0].ToString());
            Assert.AreEqual(expectedSeqB.ToString(), alignedSequences[1].ToString());

            Assert.AreEqual(40, profileScore);
        }
Exemplo n.º 2
0
        /// <summary>
        /// Combine two profileAlignments into one if they are aligned already
        /// </summary>
        /// <param name="profileAlignmentA">first profile alignment</param>
        /// <param name="profileAlignmentB">second profile alignment</param>
        public static IProfileAlignment GenerateProfileAlignment(IProfileAlignment profileAlignmentA, IProfileAlignment profileAlignmentB)
        {

            IProfiles profileMatrix = Profiles.GenerateProfiles(
                        profileAlignmentA.ProfilesMatrix, profileAlignmentB.ProfilesMatrix,
                        profileAlignmentA.NumberOfSequences, profileAlignmentB.NumberOfSequences);

            IProfileAlignment profileAlignment = new ProfileAlignment();
            profileAlignment.NumberOfSequences = profileAlignmentA.NumberOfSequences + profileAlignmentB.NumberOfSequences;
            profileAlignment.ProfilesMatrix = profileMatrix;

            return profileAlignment;
        }
Exemplo n.º 3
0
        /// <summary>
        /// Combine two profileAlignments into one if they are aligned already
        /// </summary>
        /// <param name="profileAlignmentA">first profile alignment</param>
        /// <param name="profileAlignmentB">second profile alignment</param>
        public static IProfileAlignment GenerateProfileAlignment(IProfileAlignment profileAlignmentA, IProfileAlignment profileAlignmentB)
        {
            IProfiles profileMatrix = Profiles.GenerateProfiles(
                profileAlignmentA.ProfilesMatrix, profileAlignmentB.ProfilesMatrix,
                profileAlignmentA.NumberOfSequences, profileAlignmentB.NumberOfSequences);

            IProfileAlignment profileAlignment = new ProfileAlignment();

            profileAlignment.NumberOfSequences = profileAlignmentA.NumberOfSequences + profileAlignmentB.NumberOfSequences;
            profileAlignment.ProfilesMatrix    = profileMatrix;

            return(profileAlignment);
        }
Exemplo n.º 4
0
        public void TestProfile()
        {
            ISequence templateSequence     = new Sequence(Alphabets.AmbiguousDNA, "ATGCSWRYKMBVHDN-");
            Dictionary <byte, int> itemSet = new Dictionary <byte, int>();

            for (int i = 0; i < templateSequence.Count; ++i)
            {
                itemSet.Add(templateSequence[i], i);

                if (char.IsLetter((char)templateSequence[i]))
                {
                    itemSet.Add((byte)char.ToLower((char)templateSequence[i]), i);
                }
            }
            Profiles.ItemSet = itemSet;

            ISequence seqA = new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT");
            ISequence seqB = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG");

            List <ISequence> sequences = new List <ISequence>();

            sequences.Add(seqA);
            sequences.Add(seqB);

            // Test GenerateProfiles
            IProfiles profileA = Profiles.GenerateProfiles(sequences[0]);

            Assert.AreEqual(16, profileA.ColumnSize);
            Assert.AreEqual(sequences[0].Count, profileA.RowSize);

            // Test ProfileMatrix
            Assert.AreEqual(1, profileA.ProfilesMatrix[0][2]);
            Assert.AreEqual(0, profileA.ProfilesMatrix[0][3]);

            // Test ProfileAlignment
            IProfileAlignment profileAlignmentA = ProfileAlignment.GenerateProfileAlignment(sequences[0]);

            Assert.AreEqual(1, profileAlignmentA.ProfilesMatrix[0][2]);
            Assert.AreEqual(0, profileAlignmentA.ProfilesMatrix[0][3]);
            Assert.AreEqual(1, profileAlignmentA.NumberOfSequences);

            IProfileAlignment profileAlignmentB = ProfileAlignment.GenerateProfileAlignment(sequences);

            Assert.AreEqual(1, profileAlignmentB.ProfilesMatrix[0][2]);
            Assert.AreEqual(0, profileAlignmentB.ProfilesMatrix[0][3]);
            Assert.AreEqual(2, profileAlignmentB.NumberOfSequences);

            Assert.AreEqual(0.5, profileAlignmentB.ProfilesMatrix[5][0]);
            Assert.AreEqual(0.5, profileAlignmentB.ProfilesMatrix[5][1]);
            Assert.AreEqual(0, profileAlignmentB.ProfilesMatrix[5][2]);
        }
Exemplo n.º 5
0
        /// <summary>
        /// Combine two profileAlignments with alignment operation array from dynamic programming.
        /// The dynamic programming algorithm returns two arrays containing the alignment operations
        /// on the two profiles. This method applies the operation information in the two arrays to
        /// the two original profiles, and combine them into a new aligned profile, and put into the
        /// newly generated profileAlignment.
        /// </summary>
        /// <param name="profileAlignmentA">first profile alignment</param>
        /// <param name="profileAlignmentB">second profile alignment</param>
        /// <param name="aAligned">aligned interger array generated by dynamic programming</param>
        /// <param name="bAligned">aligned interger array generated by dynamic programming</param>
        /// <param name="gapCode">the gap integer code defined in dynamic programming class</param>
        public static IProfileAlignment GenerateProfileAlignment(
            IProfileAlignment profileAlignmentA,
            IProfileAlignment profileAlignmentB,
            int[] aAligned,
            int[] bAligned,
            int gapCode)
        {
            IProfiles profileMatrix = Profiles.GenerateProfiles(
                profileAlignmentA.ProfilesMatrix, profileAlignmentB.ProfilesMatrix,
                profileAlignmentA.NumberOfSequences, profileAlignmentB.NumberOfSequences,
                aAligned, bAligned, gapCode);

            IProfileAlignment profileAlignment = new ProfileAlignment();

            profileAlignment.NumberOfSequences = profileAlignmentA.NumberOfSequences +
                                                 profileAlignmentB.NumberOfSequences;
            profileAlignment.ProfilesMatrix = profileMatrix;

            return(profileAlignment);
        }
Exemplo n.º 6
0
        public void TestNeedlemanWunschProfileAligner()
        {
            Console.WriteLine("Number of logical processors: {0}", Environment.ProcessorCount);

            ISequence templateSequence     = new Sequence(Alphabets.AmbiguousDNA, "ATGCSWRYKMBVHDN-");
            Dictionary <byte, int> itemSet = new Dictionary <byte, int>();

            for (int i = 0; i < templateSequence.Count; ++i)
            {
                itemSet.Add(templateSequence[i], i);

                if (char.IsLetter((char)templateSequence[i]))
                {
                    itemSet.Add((byte)char.ToLower((char)templateSequence[i]), i);
                }
            }
            Profiles.ItemSet = itemSet;



            SimilarityMatrix similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna);
            int gapOpenPenalty   = -3;
            int gapExtendPenalty = -1;

            IProfileAligner profileAligner = new NeedlemanWunschProfileAlignerSerial(similarityMatrix, ProfileScoreFunctionNames.WeightedInnerProduct,
                                                                                     gapOpenPenalty, gapExtendPenalty, Environment.ProcessorCount);

            ISequence seqA = new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT");
            ISequence seqB = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG");

            List <ISequence> sequences = new List <ISequence>();

            sequences.Add(seqA);
            sequences.Add(seqB);

            IProfileAlignment profileAlignmentA = ProfileAlignment.GenerateProfileAlignment(sequences[0]);
            IProfileAlignment profileAlignmentB = ProfileAlignment.GenerateProfileAlignment(sequences[1]);

            profileAligner.Align(profileAlignmentA, profileAlignmentB);


            List <int> eStringSubtree  = profileAligner.GenerateEString(profileAligner.AlignedA);
            List <int> eStringSubtreeB = profileAligner.GenerateEString(profileAligner.AlignedB);

            List <ISequence> alignedSequences = new List <ISequence>();

            ISequence seq = profileAligner.GenerateSequenceFromEString(eStringSubtree, sequences[0]);

            alignedSequences.Add(seq);
            seq = profileAligner.GenerateSequenceFromEString(eStringSubtreeB, sequences[1]);
            alignedSequences.Add(seq);

            float profileScore = MsaUtils.MultipleAlignmentScoreFunction(alignedSequences, similarityMatrix, gapOpenPenalty, gapExtendPenalty);

            Console.WriteLine("alignment score is: {0}", profileScore);

            Console.WriteLine("the aligned sequences are:");
            for (int i = 0; i < alignedSequences.Count; ++i)
            {
                Console.WriteLine(new string(alignedSequences[i].Select(a => (char)a).ToArray()));
            }

            // Test on case 3: 36 sequences
            string            filepath     = @"\TestUtils\RV11_BBS_allSmall.afa";
            string            filePathObj  = Directory.GetCurrentDirectory() + filepath;
            FastAParser       parser       = new FastAParser(filePathObj);
            IList <ISequence> orgSequences = parser.Parse().ToList();

            sequences = MsaUtils.UnAlign(orgSequences);

            int numberOfSequences = orgSequences.Count;

            Console.WriteLine("Original unaligned sequences are:");
            for (int i = 0; i < numberOfSequences; ++i)
            {
                Console.WriteLine(">");
                Console.WriteLine(new string(sequences[i].Select(a => (char)a).ToArray()));
            }

            for (int i = 1; i < numberOfSequences - 1; ++i)
            {
                for (int j = i + 1; j < numberOfSequences; ++j)
                {
                    profileAlignmentA = ProfileAlignment.GenerateProfileAlignment(sequences[i]);
                    profileAlignmentB = ProfileAlignment.GenerateProfileAlignment(sequences[j]);

                    profileAligner = new NeedlemanWunschProfileAlignerSerial(similarityMatrix, ProfileScoreFunctionNames.WeightedInnerProduct,
                                                                             gapOpenPenalty, gapExtendPenalty, Environment.ProcessorCount);
                    profileAligner.Align(profileAlignmentA, profileAlignmentB);

                    eStringSubtree  = profileAligner.GenerateEString(profileAligner.AlignedA);
                    eStringSubtreeB = profileAligner.GenerateEString(profileAligner.AlignedB);

                    Console.WriteLine("Sequences lengths are: {0}-{1}", sequences[i].Count, sequences[j].Count);
                    Console.WriteLine("estring 1:");
                    for (int k = 0; k < eStringSubtree.Count; ++k)
                    {
                        Console.Write("{0}\t", eStringSubtree[k]);
                    }
                    Console.WriteLine("\nestring 2:");
                    for (int k = 0; k < eStringSubtreeB.Count; ++k)
                    {
                        Console.Write("{0}\t", eStringSubtreeB[k]);
                    }

                    alignedSequences = new List <ISequence>();

                    seq = profileAligner.GenerateSequenceFromEString(eStringSubtree, sequences[i]);
                    alignedSequences.Add(seq);
                    seq = profileAligner.GenerateSequenceFromEString(eStringSubtreeB, sequences[j]);
                    alignedSequences.Add(seq);

                    profileScore = MsaUtils.MultipleAlignmentScoreFunction(alignedSequences, similarityMatrix, gapOpenPenalty, gapExtendPenalty);

                    Console.WriteLine("\nalignment score is: {0}", profileScore);

                    Console.WriteLine("the aligned sequences are:");
                    for (int k = 0; k < alignedSequences.Count; ++k)
                    {
                        Console.WriteLine(new string(alignedSequences[k].Select(a => (char)a).ToArray()));
                    }
                }
                ((FastAParser)parser).Dispose();
            }
        }
Exemplo n.º 7
0
        /// <summary>
        /// Combine two profileAlignments with alignment operation array from dynamic programming.
        /// The dynamic programming algorithm returns two arrays containing the alignment operations
        /// on the two profiles. This method applies the operation information in the two arrays to 
        /// the two original profiles, and combine them into a new aligned profile, and put into the
        /// newly generated profileAlignment.
        /// </summary>
        /// <param name="profileAlignmentA">first profile alignment</param>
        /// <param name="profileAlignmentB">second profile alignment</param>
        /// <param name="aAligned">aligned integer array generated by dynamic programming</param>
        /// <param name="bAligned">aligned integer array generated by dynamic programming</param>
        /// <param name="gapCode">the gap integer code defined in dynamic programming class</param>
        /// <param name="weights">the weights of two profileAlignments</param>
        public static IProfileAlignment GenerateProfileAlignment(
                IProfileAlignment profileAlignmentA,
                IProfileAlignment profileAlignmentB,
                int[] aAligned,
                int[] bAligned,
                int gapCode,
                float[] weights)
        {
            IProfiles profileMatrix = Profiles.GenerateProfiles(
                profileAlignmentA.ProfilesMatrix, profileAlignmentB.ProfilesMatrix,
                profileAlignmentA.NumberOfSequences, profileAlignmentB.NumberOfSequences,
                aAligned, bAligned, gapCode, weights);

            IProfileAlignment profileAlignment = new ProfileAlignment();
            profileAlignment.NumberOfSequences = profileAlignmentA.NumberOfSequences +
                                                    profileAlignmentB.NumberOfSequences;
            profileAlignment.ProfilesMatrix = profileMatrix;

            return profileAlignment;
        }
        /// <summary>
        /// Pairwise alignment of two sequences using an affine gap penalty.  The various algorithms in derived classes (NeedlemanWunsch, 
        /// SmithWaterman, and PairwiseOverlap) all use this general engine for alignment with an affine gap penalty.
        /// </summary>
        /// <param name="similarityMatrix">Scoring matrix.</param>
        /// <param name="gapOpenPenalty">Gap open penalty (by convention, use a negative number for this.)</param>
        /// <param name="gapExtensionPenalty">Gap extension penalty (by convention, use a negative number for this.)</param>
        /// <param name="profileAlignmentA">First input profileAlignment</param>
        /// <param name="profileAlignmentB">Second input profileAlignment</param>
        public IProfileAlignment Align(
            SimilarityMatrix similarityMatrix,
            int gapOpenPenalty,
            int gapExtensionPenalty,
            IProfileAlignment profileAlignmentA,
            IProfileAlignment profileAlignmentB)
        {
            _profileAlignmentA = profileAlignmentA;
            _profileAlignmentB = profileAlignmentB;

            ResetSpecificAlgorithmMemberVariables();
            // Set Gap Penalty and Similarity Matrix
            GapOpenCost = gapOpenPenalty;
            GapExtensionCost = gapExtensionPenalty;
            SimilarityMatrix = similarityMatrix;

            ValidateAlignInput(profileAlignmentA, profileAlignmentB);  // throws exception if input not valid

            // Convert input strings to 0-based int arrays using similarity matrix mapping
            _a = MsaUtils.CreateIndexArray(profileAlignmentA.ProfilesMatrix.RowSize);
            _b = MsaUtils.CreateIndexArray(profileAlignmentB.ProfilesMatrix.RowSize);

            if (_doCaching)
            {
                _cachingFunction(similarityMatrix, _profileAlignmentA, _profileAlignmentB);
            }
            
            // Sort profileA
            _indexAs = CachingIndex(_profileAlignmentA);

            FillMatrixAffine();

            //DumpF();  // Writes matrix to application log, used for development and testing
            //DumpAffine(); // Writes matrix to application log in great detail.  Useful only for small cases.

            float optScore = Traceback(out _alignedA, out _alignedB);

            #region Convert aligned sequences back to Sequence objects, load output SequenceAlignment object
            IProfileAlignment results = null;

            //AddSimpleConsensusToResult(results);
            if (PAMSAMMultipleSequenceAligner.UseWeights)
            {
                results = ProfileAlignment.GenerateProfileAlignment(_profileAlignmentA, _profileAlignmentB, _alignedA, _alignedB, _gapCode, _weights);
            }
            else
            {
                results = ProfileAlignment.GenerateProfileAlignment(_profileAlignmentA, _profileAlignmentB, _alignedA, _alignedB, _gapCode);
            }
            results.Score = optScore;
            #endregion

            return results;
        }
 /// <summary>
 /// Modified for profiles.
 /// </summary>
 /// <param name="aInput">First input sequence.</param>
 /// <param name="bInput">Second input sequence.</param>
 protected void ValidateAlignInput(IProfileAlignment aInput, IProfileAlignment bInput)
 {
     if (aInput.ProfilesMatrix.ColumnSize != bInput.ProfilesMatrix.ColumnSize)
     {
         throw new Exception("Input profiles have different column sizes");
     }
     // Warning if gap penalty > 0
     if (_gapOpenPenalty > 0)
     {
         ApplicationLog.WriteLine("Gap Open Penalty {0} > 0, possible error", _gapOpenPenalty);
     }
     if (_gapExtensionPenalty > 0)
     {
         ApplicationLog.WriteLine("Gap Extension Penalty {0} > 0, possible error", _gapExtensionPenalty);
     }
 }
 /// <summary>
 /// Caching Indexes
 /// </summary>
 /// <param name="profileAlignment">profileAlignment</param>
 public int[][] CachingIndex(IProfileAlignment profileAlignment)
 {
     int rowSize = profileAlignment.ProfilesMatrix.RowSize;
     int colSize = profileAlignment.ProfilesMatrix.ColumnSize;
     int[][]_indexAs = new int[rowSize][];
     for (int i = 0; i < rowSize; ++i)
     //Parallel.For(0, rowSize, PAMSAMMultipleSequenceAligner.parallelOption, i =>
     {
         MsaUtils.QuickSortM(profileAlignment.ProfilesMatrix[i], out _indexAs[i],
                         0, colSize - 1);
     //});
     }
     return _indexAs;
 }
 /// <summary>
 /// Modified Align to align two profiles with affine gap panelty.
 /// </summary>
 /// <param name="profileA">first input profile</param>
 /// <param name="profileB">second input profile</param>
 public IProfileAlignment Align(IProfileAlignment profileA, IProfileAlignment profileB)
 {
     IProfileAlignment result;
     result = Align(_similarityMatrix, _gapOpenPenalty, _gapExtensionPenalty, profileA, profileB);
     return result;
 }
 /// <summary>
 /// Cache the multification of similarity matrix and one profiles.
 /// </summary>
 /// <param name="similarityMatrix">similarity matrix</param>
 /// <param name="profileAlignmentA">profile alignment A</param>
 /// <param name="profileAlignmentB">profile alignment B</param>
 public void CachingWeightedInnerProduct(SimilarityMatrix similarityMatrix, IProfileAlignment profileAlignmentA, IProfileAlignment profileAlignmentB)
 {
     
     int rowSize = profileAlignmentB.ProfilesMatrix.RowSize;
     int colSize = profileAlignmentB.ProfilesMatrix.ColumnSize;
     
     _cachedMatrix = new float[rowSize][];
     
     for (int row = 0; row < rowSize; ++row)
     //Parallel.For(0, rowSize, PAMSAMMultipleSequenceAligner.parallelOption, row =>
     {
         _cachedMatrix[row] = new float[colSize];
         for (int i = 0; i < colSize; ++i)
         {
             for (int j = 0; j < colSize; ++j)
             {
                 _cachedMatrix[row][i] += profileAlignmentB.ProfilesMatrix[row][j] * similarityMatrix[i, j];
             }
         }
     //});
     }
     //_indexAs = CachingIndex(profileAlignmentA);
 }
        /// <summary>
        /// Pairwise alignment of two sequences using a single gap penalty.  The various algorithms in derived classes (NeedlemanWunsch, 
        /// SmithWaterman, and PairwiseOverlap) all use this general engine for alignment with a single gap penalty.
        /// </summary>
        /// <param name="similarityMatrix">Scoring matrix.</param>
        /// <param name="gapPenalty">Gap penalty (by convention, use a negative number for this.)</param>
        /// <param name="profileAlignmentA">First input profileAlignment.</param>
        /// <param name="profileAlignmentB">Second input profileAlignment.</param>
        /// <returns>Aligned sequences and other information as SequenceAlignment object.</returns>
        public IProfileAlignment AlignSimple(
            SimilarityMatrix similarityMatrix,
            int gapPenalty,
            IProfileAlignment profileAlignmentA,
            IProfileAlignment profileAlignmentB)
        {
            if (profileAlignmentA == null)
            {
                throw new ArgumentNullException("profileAlignmentA");
            }

            if (profileAlignmentB == null)
            {
                throw new ArgumentNullException("profileAlignmentB");
            }

            _profileAlignmentA = profileAlignmentA;
            _profileAlignmentB = profileAlignmentB;

            ResetSpecificAlgorithmMemberVariables();
            // Set Gap Penalty and Similarity Matrix
            GapOpenCost = gapPenalty;
            // note that GapExtensionCost is not used for simple gap penalty
            SimilarityMatrix = similarityMatrix;

            ValidateAlignInput(profileAlignmentA, profileAlignmentB);  // throws exception if input not valid

            // Convert input strings to 0-based int arrays using similarity matrix mapping
            _a = MsaUtils.CreateIndexArray(profileAlignmentA.ProfilesMatrix.RowSize);
            _b = MsaUtils.CreateIndexArray(profileAlignmentB.ProfilesMatrix.RowSize);

            if (_doCaching)
            {
                _cachingFunction(similarityMatrix, profileAlignmentA, profileAlignmentB);
            }
            else
            {
                _indexAs = CachingIndex(profileAlignmentA);
                _indexBs = CachingIndex(profileAlignmentB);
            }

            FillMatrixSimple();

            //DumpF();  // Writes F-matrix to application log, used for development and testing

            float optScore = Traceback(out _alignedA, out _alignedB);

            #region Convert aligned sequences back to Sequence objects, load output SequenceAlignment object
            ProfileAlignment results = null;
            if (PAMSAMMultipleSequenceAligner.UseWeights)
            {
                results = (ProfileAlignment)ProfileAlignment.GenerateProfileAlignment(profileAlignmentA, profileAlignmentB, _alignedA, _alignedB, _gapCode, _weights);
            }
            else
            {
                results = (ProfileAlignment)ProfileAlignment.GenerateProfileAlignment(profileAlignmentA, profileAlignmentB, _alignedA, _alignedB, _gapCode);
            }
            results.Score = optScore;
            //AddSimpleConsensusToResult(results);
            #endregion

            return results;
        }
 /// <summary>
 /// Modified AlignSimple to align two profiles with constant gap panelty.
 /// </summary>
 /// <param name="profileA">First input sequence.</param>
 /// <param name="profileB">Second input sequence.</param>
 /// <returns>Object containing the alignment.</returns>
 public IProfileAlignment AlignSimple(IProfileAlignment profileA, IProfileAlignment profileB)
 {
     IProfileAlignment result;
     result = (ProfileAlignment)AlignSimple(_similarityMatrix, _gapOpenPenalty, profileA, profileB);
     return result;
 }
        /// <summary>
        /// Caching Indexes
        /// </summary>
        /// <param name="profileAlignment">profileAlignment</param>
        public int[][] CachingIndex(IProfileAlignment profileAlignment)
        {
            if (profileAlignment == null)
            {
                throw new ArgumentNullException("profileAlignment");
            }

            int rowSize = profileAlignment.ProfilesMatrix.RowSize;
            int colSize = profileAlignment.ProfilesMatrix.ColumnSize;
            int[][] _indexAs = new int[rowSize][];
            //for (int i = 0; i < rowSize; ++i)
            Parallel.For(0, rowSize, PAMSAMMultipleSequenceAligner.ParallelOption, i =>
            {
                MsaUtils.QuickSortM(profileAlignment.ProfilesMatrix[i], out _indexAs[i],
                                0, colSize - 1);
            });
            //}
            return _indexAs;
        }
Exemplo n.º 16
0
        public void TestNeedlemanWunschProfileAligner()
        {
            Console.WriteLine("Number of logical processors: {0}", Environment.ProcessorCount);

            ISequence templateSequence = new Sequence(Alphabets.DNA, "ATGCSWRYKMBVHDN-");
            Dictionary <ISequenceItem, int> itemSet = new Dictionary <ISequenceItem, int>();

            for (int i = 0; i < templateSequence.Count; ++i)
            {
                itemSet.Add(templateSequence[i], i);
            }
            Profiles.ItemSet = itemSet;



            SimilarityMatrix similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna);
            int gapOpenPenalty   = -3;
            int gapExtendPenalty = -1;

            IProfileAligner profileAligner = new NeedlemanWunschProfileAlignerSerial(similarityMatrix, ProfileScoreFunctionNames.WeightedInnerProduct,
                                                                                     gapOpenPenalty, gapExtendPenalty, Environment.ProcessorCount);

            ISequence seqA = new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT");
            ISequence seqB = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG");

            List <ISequence> sequences = new List <ISequence>();

            sequences.Add(seqA);
            sequences.Add(seqB);

            IProfileAlignment profileAlignmentA = ProfileAlignment.GenerateProfileAlignment(sequences[0]);
            IProfileAlignment profileAlignmentB = ProfileAlignment.GenerateProfileAlignment(sequences[1]);

            profileAligner.Align(profileAlignmentA, profileAlignmentB);


            List <int> eStringSubtree  = profileAligner.GenerateEString(profileAligner.AlignedA);
            List <int> eStringSubtreeB = profileAligner.GenerateEString(profileAligner.AlignedB);

            List <ISequence> alignedSequences = new List <ISequence>();

            ISequence seq = profileAligner.GenerateSequenceFromEString(eStringSubtree, sequences[0]);

            alignedSequences.Add(seq);
            seq = profileAligner.GenerateSequenceFromEString(eStringSubtreeB, sequences[1]);
            alignedSequences.Add(seq);

            float profileScore = MsaUtils.MultipleAlignmentScoreFunction(alignedSequences, similarityMatrix, gapOpenPenalty, gapExtendPenalty);

            Console.WriteLine("alignment score is: {0}", profileScore);

            Console.WriteLine("the aligned sequences are:");
            for (int i = 0; i < alignedSequences.Count; ++i)
            {
                Console.WriteLine(alignedSequences[i].ToString());
            }

            ISequence expectedSeqA = new Sequence(Alphabets.DNA, "GGGAA---AAATCAGATT");
            ISequence expectedSeqB = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG---");

            // Test on case 3: 36 sequences
            ISequenceParser   parser       = new FastaParser();
            string            filepath     = @"testdata\FASTA\RV11_BBS_all.afa";
            IList <ISequence> orgSequences = parser.Parse(filepath);

            sequences = MsaUtils.UnAlign(orgSequences);

            int numberOfSequences = orgSequences.Count;

            Console.WriteLine("Original unaligned sequences are:");
            for (int i = 0; i < numberOfSequences; ++i)
            {
                Console.WriteLine(">");
                Console.WriteLine(sequences[i].ToString());
            }

            for (int i = 1; i < numberOfSequences - 1; ++i)
            {
                for (int j = i + 1; j < numberOfSequences; ++j)
                {
                    profileAlignmentA = ProfileAlignment.GenerateProfileAlignment(sequences[i]);
                    profileAlignmentB = ProfileAlignment.GenerateProfileAlignment(sequences[j]);

                    profileAligner = new NeedlemanWunschProfileAlignerSerial(similarityMatrix, ProfileScoreFunctionNames.WeightedInnerProduct,
                                                                             gapOpenPenalty, gapExtendPenalty, Environment.ProcessorCount);
                    profileAligner.Align(profileAlignmentA, profileAlignmentB);

                    eStringSubtree  = profileAligner.GenerateEString(profileAligner.AlignedA);
                    eStringSubtreeB = profileAligner.GenerateEString(profileAligner.AlignedB);

                    Console.WriteLine("Sequences lengths are: {0}-{1}", sequences[i].Count, sequences[j].Count);
                    Console.WriteLine("estring 1:");
                    for (int k = 0; k < eStringSubtree.Count; ++k)
                    {
                        Console.Write("{0}\t", eStringSubtree[k]);
                    }
                    Console.WriteLine("\nestring 2:");
                    for (int k = 0; k < eStringSubtreeB.Count; ++k)
                    {
                        Console.Write("{0}\t", eStringSubtreeB[k]);
                    }

                    alignedSequences = new List <ISequence>();

                    seq = profileAligner.GenerateSequenceFromEString(eStringSubtree, sequences[i]);
                    alignedSequences.Add(seq);
                    seq = profileAligner.GenerateSequenceFromEString(eStringSubtreeB, sequences[j]);
                    alignedSequences.Add(seq);

                    profileScore = MsaUtils.MultipleAlignmentScoreFunction(alignedSequences, similarityMatrix, gapOpenPenalty, gapExtendPenalty);

                    Console.WriteLine("\nalignment score is: {0}", profileScore);

                    Console.WriteLine("the aligned sequences are:");
                    for (int k = 0; k < alignedSequences.Count; ++k)
                    {
                        Console.WriteLine(alignedSequences[k].ToString());
                    }
                }
            }
        }