public void TestParallelDynamicProgramming()
        {
            ISequence templateSequence     = new Sequence(Alphabets.AmbiguousDNA, "ATGCSWRYKMBVHDN-");
            Dictionary <byte, int> itemSet = new Dictionary <byte, int>();

            for (int i = 0; i < templateSequence.Count; ++i)
            {
                itemSet.Add(templateSequence[i], i);

                if (char.IsLetter((char)templateSequence[i]))
                {
                    itemSet.Add((byte)char.ToLower((char)templateSequence[i]), i);
                }
            }
            Profiles.ItemSet = itemSet;

            NeedlemanWunschProfileAlignerSerial profileAligner = new NeedlemanWunschProfileAlignerSerial();
            SimilarityMatrix similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna);
            int gapOpenPenalty   = -8;
            int gapExtendPenalty = -1;

            profileAligner.SimilarityMatrix = similarityMatrix;
            profileAligner.GapOpenCost      = gapOpenPenalty;
            profileAligner.GapExtensionCost = gapExtendPenalty;



            int numberOfRows = 8, numberOfCols = 7;
            int numberOfPartitions = 4;

            int startPosition = 1, endPosition = 100; Dictionary <int, List <int[]> > parallelIndexMaster = profileAligner.ParallelIndexMasterGenerator(numberOfRows, numberOfCols, numberOfPartitions);

            foreach (var pair in parallelIndexMaster)
            {
                Console.Write("{0} ->: ", pair.Key);
                for (int i = 0; i < pair.Value.Count; ++i)
                {
                    Console.WriteLine("iteration: {0}: {1}-{2};", i, pair.Value[i][0], pair.Value[i][1]);
                }
            }

            for (int partitionIndex = 0; partitionIndex < numberOfPartitions; ++partitionIndex)
            {
                int[] indexPositions = profileAligner.IndexLocator(startPosition, endPosition, numberOfPartitions, partitionIndex);
                Console.Write("Index number: {0}: {1}-{2}", partitionIndex, indexPositions[0], indexPositions[1]);
            }

            int numberOfIterations = numberOfPartitions * 2 - 1;

            for (int i = 0; i < numberOfIterations; ++i)
            {
                foreach (var pair in parallelIndexMaster)
                {
                    List <int[]> indexPositions = parallelIndexMaster[pair.Key];

                    // Parallel in anti-diagonal direction
                    Parallel.ForEach(indexPositions, indexPosition =>
                    {
                        int[] rowPositions = profileAligner.IndexLocator(1, 100, numberOfPartitions, indexPosition[0]);
                        int[] colPositions = profileAligner.IndexLocator(1, 200, numberOfPartitions, indexPosition[0]);
                        Console.Write("row positions: {0}-{1}", rowPositions[0], rowPositions[1]);
                        Console.Write("col positions: {0}-{1}", colPositions[0], colPositions[1]);
                    });
                }
            }
        }
Esempio n. 2
0
        public void TestNeedlemanWunschProfileAligner()
        {
            Console.WriteLine("Number of logical processors: {0}", Environment.ProcessorCount);

            ISequence templateSequence     = new Sequence(Alphabets.AmbiguousDNA, "ATGCSWRYKMBVHDN-");
            Dictionary <byte, int> itemSet = new Dictionary <byte, int>();

            for (int i = 0; i < templateSequence.Count; ++i)
            {
                itemSet.Add(templateSequence[i], i);

                if (char.IsLetter((char)templateSequence[i]))
                {
                    itemSet.Add((byte)char.ToLower((char)templateSequence[i]), i);
                }
            }
            Profiles.ItemSet = itemSet;



            SimilarityMatrix similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna);
            int gapOpenPenalty   = -3;
            int gapExtendPenalty = -1;

            IProfileAligner profileAligner = new NeedlemanWunschProfileAlignerSerial(similarityMatrix, ProfileScoreFunctionNames.WeightedInnerProduct,
                                                                                     gapOpenPenalty, gapExtendPenalty, Environment.ProcessorCount);

            ISequence seqA = new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT");
            ISequence seqB = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG");

            List <ISequence> sequences = new List <ISequence>();

            sequences.Add(seqA);
            sequences.Add(seqB);

            IProfileAlignment profileAlignmentA = ProfileAlignment.GenerateProfileAlignment(sequences[0]);
            IProfileAlignment profileAlignmentB = ProfileAlignment.GenerateProfileAlignment(sequences[1]);

            profileAligner.Align(profileAlignmentA, profileAlignmentB);


            List <int> eStringSubtree  = profileAligner.GenerateEString(profileAligner.AlignedA);
            List <int> eStringSubtreeB = profileAligner.GenerateEString(profileAligner.AlignedB);

            List <ISequence> alignedSequences = new List <ISequence>();

            ISequence seq = profileAligner.GenerateSequenceFromEString(eStringSubtree, sequences[0]);

            alignedSequences.Add(seq);
            seq = profileAligner.GenerateSequenceFromEString(eStringSubtreeB, sequences[1]);
            alignedSequences.Add(seq);

            float profileScore = MsaUtils.MultipleAlignmentScoreFunction(alignedSequences, similarityMatrix, gapOpenPenalty, gapExtendPenalty);

            Console.WriteLine("alignment score is: {0}", profileScore);

            Console.WriteLine("the aligned sequences are:");
            for (int i = 0; i < alignedSequences.Count; ++i)
            {
                Console.WriteLine(new string(alignedSequences[i].Select(a => (char)a).ToArray()));
            }

            // Test on case 3: 36 sequences
            string            filepath     = @"\TestUtils\RV11_BBS_allSmall.afa";
            string            filePathObj  = Directory.GetCurrentDirectory() + filepath;
            FastAParser       parser       = new FastAParser(filePathObj);
            IList <ISequence> orgSequences = parser.Parse().ToList();

            sequences = MsaUtils.UnAlign(orgSequences);

            int numberOfSequences = orgSequences.Count;

            Console.WriteLine("Original unaligned sequences are:");
            for (int i = 0; i < numberOfSequences; ++i)
            {
                Console.WriteLine(">");
                Console.WriteLine(new string(sequences[i].Select(a => (char)a).ToArray()));
            }

            for (int i = 1; i < numberOfSequences - 1; ++i)
            {
                for (int j = i + 1; j < numberOfSequences; ++j)
                {
                    profileAlignmentA = ProfileAlignment.GenerateProfileAlignment(sequences[i]);
                    profileAlignmentB = ProfileAlignment.GenerateProfileAlignment(sequences[j]);

                    profileAligner = new NeedlemanWunschProfileAlignerSerial(similarityMatrix, ProfileScoreFunctionNames.WeightedInnerProduct,
                                                                             gapOpenPenalty, gapExtendPenalty, Environment.ProcessorCount);
                    profileAligner.Align(profileAlignmentA, profileAlignmentB);

                    eStringSubtree  = profileAligner.GenerateEString(profileAligner.AlignedA);
                    eStringSubtreeB = profileAligner.GenerateEString(profileAligner.AlignedB);

                    Console.WriteLine("Sequences lengths are: {0}-{1}", sequences[i].Count, sequences[j].Count);
                    Console.WriteLine("estring 1:");
                    for (int k = 0; k < eStringSubtree.Count; ++k)
                    {
                        Console.Write("{0}\t", eStringSubtree[k]);
                    }
                    Console.WriteLine("\nestring 2:");
                    for (int k = 0; k < eStringSubtreeB.Count; ++k)
                    {
                        Console.Write("{0}\t", eStringSubtreeB[k]);
                    }

                    alignedSequences = new List <ISequence>();

                    seq = profileAligner.GenerateSequenceFromEString(eStringSubtree, sequences[i]);
                    alignedSequences.Add(seq);
                    seq = profileAligner.GenerateSequenceFromEString(eStringSubtreeB, sequences[j]);
                    alignedSequences.Add(seq);

                    profileScore = MsaUtils.MultipleAlignmentScoreFunction(alignedSequences, similarityMatrix, gapOpenPenalty, gapExtendPenalty);

                    Console.WriteLine("\nalignment score is: {0}", profileScore);

                    Console.WriteLine("the aligned sequences are:");
                    for (int k = 0; k < alignedSequences.Count; ++k)
                    {
                        Console.WriteLine(new string(alignedSequences[k].Select(a => (char)a).ToArray()));
                    }
                }
                ((FastAParser)parser).Dispose();
            }
        }
Esempio n. 3
0
        public void TestNeedlemanWunschProfileAligner()
        {
            Console.WriteLine("Number of logical processors: {0}", Environment.ProcessorCount);

            ISequence templateSequence = new Sequence(Alphabets.DNA, "ATGCSWRYKMBVHDN-");
            Dictionary <ISequenceItem, int> itemSet = new Dictionary <ISequenceItem, int>();

            for (int i = 0; i < templateSequence.Count; ++i)
            {
                itemSet.Add(templateSequence[i], i);
            }
            Profiles.ItemSet = itemSet;



            SimilarityMatrix similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna);
            int gapOpenPenalty   = -3;
            int gapExtendPenalty = -1;

            IProfileAligner profileAligner = new NeedlemanWunschProfileAlignerSerial(similarityMatrix, ProfileScoreFunctionNames.WeightedInnerProduct,
                                                                                     gapOpenPenalty, gapExtendPenalty, Environment.ProcessorCount);

            ISequence seqA = new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT");
            ISequence seqB = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG");

            List <ISequence> sequences = new List <ISequence>();

            sequences.Add(seqA);
            sequences.Add(seqB);

            IProfileAlignment profileAlignmentA = ProfileAlignment.GenerateProfileAlignment(sequences[0]);
            IProfileAlignment profileAlignmentB = ProfileAlignment.GenerateProfileAlignment(sequences[1]);

            profileAligner.Align(profileAlignmentA, profileAlignmentB);


            List <int> eStringSubtree  = profileAligner.GenerateEString(profileAligner.AlignedA);
            List <int> eStringSubtreeB = profileAligner.GenerateEString(profileAligner.AlignedB);

            List <ISequence> alignedSequences = new List <ISequence>();

            ISequence seq = profileAligner.GenerateSequenceFromEString(eStringSubtree, sequences[0]);

            alignedSequences.Add(seq);
            seq = profileAligner.GenerateSequenceFromEString(eStringSubtreeB, sequences[1]);
            alignedSequences.Add(seq);

            float profileScore = MsaUtils.MultipleAlignmentScoreFunction(alignedSequences, similarityMatrix, gapOpenPenalty, gapExtendPenalty);

            Console.WriteLine("alignment score is: {0}", profileScore);

            Console.WriteLine("the aligned sequences are:");
            for (int i = 0; i < alignedSequences.Count; ++i)
            {
                Console.WriteLine(alignedSequences[i].ToString());
            }

            ISequence expectedSeqA = new Sequence(Alphabets.DNA, "GGGAA---AAATCAGATT");
            ISequence expectedSeqB = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG---");

            // Test on case 3: 36 sequences
            ISequenceParser   parser       = new FastaParser();
            string            filepath     = @"testdata\FASTA\RV11_BBS_all.afa";
            IList <ISequence> orgSequences = parser.Parse(filepath);

            sequences = MsaUtils.UnAlign(orgSequences);

            int numberOfSequences = orgSequences.Count;

            Console.WriteLine("Original unaligned sequences are:");
            for (int i = 0; i < numberOfSequences; ++i)
            {
                Console.WriteLine(">");
                Console.WriteLine(sequences[i].ToString());
            }

            for (int i = 1; i < numberOfSequences - 1; ++i)
            {
                for (int j = i + 1; j < numberOfSequences; ++j)
                {
                    profileAlignmentA = ProfileAlignment.GenerateProfileAlignment(sequences[i]);
                    profileAlignmentB = ProfileAlignment.GenerateProfileAlignment(sequences[j]);

                    profileAligner = new NeedlemanWunschProfileAlignerSerial(similarityMatrix, ProfileScoreFunctionNames.WeightedInnerProduct,
                                                                             gapOpenPenalty, gapExtendPenalty, Environment.ProcessorCount);
                    profileAligner.Align(profileAlignmentA, profileAlignmentB);

                    eStringSubtree  = profileAligner.GenerateEString(profileAligner.AlignedA);
                    eStringSubtreeB = profileAligner.GenerateEString(profileAligner.AlignedB);

                    Console.WriteLine("Sequences lengths are: {0}-{1}", sequences[i].Count, sequences[j].Count);
                    Console.WriteLine("estring 1:");
                    for (int k = 0; k < eStringSubtree.Count; ++k)
                    {
                        Console.Write("{0}\t", eStringSubtree[k]);
                    }
                    Console.WriteLine("\nestring 2:");
                    for (int k = 0; k < eStringSubtreeB.Count; ++k)
                    {
                        Console.Write("{0}\t", eStringSubtreeB[k]);
                    }

                    alignedSequences = new List <ISequence>();

                    seq = profileAligner.GenerateSequenceFromEString(eStringSubtree, sequences[i]);
                    alignedSequences.Add(seq);
                    seq = profileAligner.GenerateSequenceFromEString(eStringSubtreeB, sequences[j]);
                    alignedSequences.Add(seq);

                    profileScore = MsaUtils.MultipleAlignmentScoreFunction(alignedSequences, similarityMatrix, gapOpenPenalty, gapExtendPenalty);

                    Console.WriteLine("\nalignment score is: {0}", profileScore);

                    Console.WriteLine("the aligned sequences are:");
                    for (int k = 0; k < alignedSequences.Count; ++k)
                    {
                        Console.WriteLine(alignedSequences[k].ToString());
                    }
                }
            }
        }