Esempio n. 1
0
        /// <summary>
        /// this is the function you implement.
        /// </summary>
        /// <param name="sequenceA">the first sequence</param>
        /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param>
        /// <param name="banded">true if alignment should be band limited.</param>
        /// <returns>the alignment score and the alignment (in a Result object) for sequenceA and sequenceB.  The calling function places the result in the display appropriately.
        ///
        public ResultTable.Result Align_And_Extract(GeneSequence sequenceA, GeneSequence sequenceB, bool banded)
        {
            ResultTable.Result result = new ResultTable.Result();
            int score;                                                       // place your computed alignment score here

            string[] alignment = new string[2];                              // place your two computed alignments here
            int      sub       = MaxCharactersToAlign;

            if (sequenceA.Sequence.Length < sub)
            {
                sub = sequenceA.Sequence.Length;
            }
            int sub2 = MaxCharactersToAlign;

            if (sequenceB.Sequence.Length < sub2)
            {
                sub2 = sequenceB.Sequence.Length;
            }

            // ********* these are placeholder assignments that you'll replace with your code  *******
            score        = int.MaxValue;
            alignment[0] = "No Alignment Possible";
            alignment[1] = "No Alignment Possible";

            EditDistance editor;

            if (banded)
            {
                if (Math.Abs(sub2 - sub) > Bandwidth)
                {
                    result.Update(score, alignment[0], alignment[1]);                  // bundling your results into the right object type
                    return(result);
                }
                editor = new EditDistance(sequenceA.Sequence.Substring(0, sub), sequenceB.Sequence.Substring(0, sub2));
                editor.setupBanded();
                //Console.WriteLine(editor.toString());
                alignment = editor.bandedResults();
                //Console.WriteLine(editor.toString());
                score = editor.value();
            }
            else
            {
                editor = new EditDistance(sequenceA.Sequence.Substring(0, sub), sequenceB.Sequence.Substring(0, sub2));
                editor.setupUnbanded();
                alignment = editor.results();
                score     = editor.value();
            }

            // ***************************************************************************************


            result.Update(score, alignment[0], alignment[1]);                  // bundling your results into the right object type
            return(result);
        }
Esempio n. 2
0
        private void fillMatrix()
        {
            int alignLen = int.Parse(bandlengthBox.Text);

            ResultTable.Result result;
            PairWiseAlign      processor = new PairWiseAlign(alignLen);

            for (int x = 0; x < NUMBER_OF_SEQUENCES; ++x)
            {
                for (int y = x; y < NUMBER_OF_SEQUENCES; ++y)
                {
                    // if the two sequences are the same, fill that cell with -3*length because they match
                    if (x == y)
                    {
                        result = new ResultTable.Result();
                        PairWiseAlign align             = new PairWiseAlign();
                        int           lengthOfSequenceA = Math.Min(m_sequences[x].Sequence.Length, align.getMaxhCharactersToAlign());
                        int           score             = -3 * lengthOfSequenceA;
                        string        alignment         = m_sequences[x].Sequence.Substring(0, lengthOfSequenceA);
                        result.Update(score, alignment, alignment);
                    }
                    else
                    {
                        result = processor.Align_And_Extract(m_sequences[x], m_sequences[y], bandCheckBox.Checked);                 // ********* hook to the student's code
                    }
                    m_resultTable.AddResult(x, y, result);
                    m_resultTable.DisplayResult(x, y);
                }
            }
        }
Esempio n. 3
0
        /////////////////////////////////////////////////////////////////////////////////////////////////////////////////
        /////////////////////////////////////////////////////////////////////////////////////////////////////////////////
        /////////////////////////////////////////////////////////////////////////////////////////////////////////////////
        /// <summary>
        /// this is the function you implement.
        /// </summary>
        /// <param name="sequenceA">the first sequence</param>
        /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param>
        /// <param name="banded">true if alignment should be band limited.</param>
        /// <returns>the alignment score and the alignment (in a Result object) for sequenceA and sequenceB.  The calling function places the result in the dispay appropriately.
        ///
        public ResultTable.Result Align_And_Extract(GeneSequence sequenceA, GeneSequence sequenceB, bool banded)
        {
            ResultTable.Result result = new ResultTable.Result();
            int score;                                                       // place your computed alignment score here

            string[] alignment = new string[2];                              // place your two computed alignments here


            // ********* these are placeholder assignments that you'll replace with your code  *******
            score        = 0;
            alignment[0] = "";
            alignment[1] = "";
            // ***************************************************************************************
            if (!banded)
            {
                unrestrictedAlgorithm(ref score, ref alignment, ref sequenceA, ref sequenceB);
            }
            else
            {
                bandedAlgorithm(ref score, ref alignment, ref sequenceA, ref sequenceB);
            }

            result.Update(score, alignment[0], alignment[1]);                  // bundling your results into the right object type
            return(result);
        }
Esempio n. 4
0
        /// <summary>
        /// this is the function you implement.
        /// </summary>
        /// <param name="sequenceA">the first sequence</param>
        /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param>
        /// <param name="banded">true if alignment should be band limited.</param>
        /// <returns>the alignment score and the alignment (in a Result object) for sequenceA and sequenceB.  The calling function places the result in the dispay appropriately.
        ///
        public ResultTable.Result Align_And_Extract(GeneSequence sequenceA, GeneSequence sequenceB, bool banded)
        {
            ResultTable.Result result = new ResultTable.Result();
            int score;                                                       // place your computed alignment score here

            string[] alignment = new string[2];                              // place your two computed alignments here
            alignment[0] = alignment[1] = "";

            int maxLengthVal = banded ? 15001 : MaxCharactersToAlign;
            // If the sequences are longer than the desired alignment length, align only the desired amount.
            int rows = maxLengthVal < sequenceA.Sequence.Length + 1? maxLengthVal : sequenceA.Sequence.Length + 1;
            int cols = maxLengthVal < sequenceB.Sequence.Length + 1? maxLengthVal : sequenceB.Sequence.Length + 1;

            // Create the cost matrix and the matrix used to track the path.
            int[,] matrix = new int[rows, cols];
            int[,] prev   = new int[rows, cols];
            initializeMatrices(matrix, prev, rows, cols);

            // If it's not banded, do the unrestriced algorithm. Otherwise do banded.
            if (!banded)
            {
                unrestricted(matrix, prev, rows, cols, sequenceA, sequenceB);
            }
            else
            {
                bandedAlg(matrix, prev, rows, cols, sequenceA, sequenceB);
            }

            // The score is stored in the last cell.
            score = matrix[rows - 1, cols - 1];
            // Find the alignment strings by using the path stored in prev
            findAlignments(alignment, prev, rows, cols, score, sequenceA.Sequence, sequenceB.Sequence);

            // If the strings are too long to display, just display 100 characters.
            if (alignment[0].Length > 100)
            {
                alignment[0] = alignment[0].Substring(0, 100);
            }
            if (alignment[1].Length > 100)
            {
                alignment[1] = alignment[1].Substring(0, 100);
            }

            result.Update(score, alignment[0], alignment[1]);                  // bundling your results into the right object type
            return(result);
        }
Esempio n. 5
0
        /// <summary>
        /// this is the function you implement.
        /// </summary>
        /// <param name="sequenceA">the first sequence</param>
        /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param>
        /// <param name="banded">true if alignment should be band limited.</param>
        /// <returns>the alignment score and the alignment (in a Result object) for sequenceA and sequenceB.  The calling function places the result in the dispay appropriately.
        /// 
        public ResultTable.Result Align_And_Extract(Tuple<int,int> cell, GeneSequence sequenceA, GeneSequence sequenceB, bool banded)
        {
            ResultTable.Result result = new ResultTable.Result();
            int score;                                                       // place your computed alignment score here
            string[] alignment = new string[2];                              // place your two computed alignments here


            // ********* these are placeholder assignments that you'll replace with your code  *******
            string a = sequenceA.Sequence.Substring(0, sequenceA.Sequence.Length < MaxCharactersToAlign ? sequenceA.Sequence.Length : MaxCharactersToAlign);
            string b = sequenceB.Sequence.Substring(0, sequenceB.Sequence.Length < MaxCharactersToAlign ? sequenceB.Sequence.Length : MaxCharactersToAlign);
            DynamicProgramming dp = new DynamicProgramming(cell, a, b, banded);
            score = dp.getScore();                                                
            alignment[0] = dp.getResultA();
            alignment[1] = dp.getResultB();
            // ***************************************************************************************
            

            result.Update(score,alignment[0],alignment[1]);                  // bundling your results into the right object type 
            return(result);
        }
Esempio n. 6
0
        /// <summary>
        /// this is the function you implement.
        /// </summary>
        /// <param name="sequenceA">the first sequence</param>
        /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param>
        /// <param name="banded">true if alignment should be band limited.</param>
        /// <returns>the alignment score and the alignment (in a Result object) for sequenceA and sequenceB.  The calling function places the result in the dispay appropriately.
        ///
        public ResultTable.Result Align_And_Extract(GeneSequence sequenceA, GeneSequence sequenceB, bool banded)
        {
            ResultTable.Result result = new ResultTable.Result();
            int score;                                                       // place your computed alignment score here

            string[] alignment = new string[2];                              // place your two computed alignments here

            // ********* these are placeholder assignments that you'll replace with your code  *******
            GenomeSequencer genomeSequencer = new GenomeSequencer(sequenceA, sequenceB, MaxCharactersToAlign);

            score = genomeSequencer.calculateSequenceCost(banded);
            //score = 0;
            alignment[0] = "";
            alignment[1] = "";
            // ***************************************************************************************


            result.Update(score, alignment[0], alignment[1]);                  // bundling your results into the right object type
            return(result);
        }
Esempio n. 7
0
        /// <summary>
        /// this is the function you implement.
        /// </summary>
        /// <param name="sequenceA">the first sequence</param>
        /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param>
        /// <param name="banded">true if alignment should be band limited.</param>
        /// <returns>the alignment score and the alignment (in a Result object) for sequenceA and sequenceB.  The calling function places the result in the dispay appropriately.
        ///
        public ResultTable.Result Align_And_Extract(GeneSequence sequenceA, GeneSequence sequenceB, bool banded)
        {
            ResultTable.Result result = new ResultTable.Result();
            int score;                                                       // place your computed alignment score here

            string[] alignment = new string[2];                              // place your two computed alignments here
            alignment[0] = alignment[1] = "";

            int maxLengthVal = banded ? 15001 : MaxCharactersToAlign;

            int rows = maxLengthVal < sequenceA.Sequence.Length + 1? maxLengthVal : sequenceA.Sequence.Length + 1;
            int cols = maxLengthVal < sequenceB.Sequence.Length + 1? maxLengthVal : sequenceB.Sequence.Length + 1;

            int[,] matrix = new int[rows, cols];
            int[,] prev   = new int[rows, cols];
            initializeMatrices(matrix, prev, rows, cols);

            if (!banded)
            {
                unrestricted(matrix, prev, rows, cols, sequenceA, sequenceB);
            }
            else
            {
                bandedAlg(matrix, prev, rows, cols, sequenceA, sequenceB);
            }

            score = matrix[rows - 1, cols - 1];
            findAlignments(alignment, prev, rows, cols, score, sequenceA.Sequence, sequenceB.Sequence);

            if (alignment[0].Length > 100)
            {
                alignment[0] = alignment[0].Substring(0, 100);
            }
            if (alignment[1].Length > 100)
            {
                alignment[1] = alignment[1].Substring(0, 100);
            }

            result.Update(score, alignment[0], alignment[1]);                  // bundling your results into the right object type
            return(result);
        }
Esempio n. 8
0
        /// <summary>
        /// this is the function you implement.
        /// </summary>
        /// <param name="sequenceA">the first sequence</param>
        /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param>
        /// <param name="banded">true if alignment should be band limited.</param>
        /// <returns>the alignment score and the alignment (in a Result object) for sequenceA and sequenceB.  The calling function places the result in the dispay appropriately.
        ///
        public ResultTable.Result Align_And_Extract(GeneSequence sequenceA, GeneSequence sequenceB, bool banded)
        {
            ResultTable.Result result = new ResultTable.Result();
            int score;                                                       // place your computed alignment score here

            string[] alignment = new string[2];                              // place your two computed alignments here

            Algo algo = new Algo(sequenceA, sequenceB, false, 5000);

            algo.RunAlgo();
            algo.CalcStrings();
            //algo.PrintArray();
            // ********* these are placeholder assignments that you'll replace with your code  *******
            score        = algo.GetScore();
            alignment[0] = algo.GetRowString();
            alignment[1] = algo.GetColString();
            // ***************************************************************************************


            result.Update(score, alignment[0], alignment[1]);                  // bundling your results into the right object type
            return(result);
        }
Esempio n. 9
0
        /// <summary>
        /// this is the function you implement.
        /// </summary>
        /// <param name="sequenceA">the first sequence</param>
        /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param>
        /// <param name="banded">true if alignment should be band limited.</param>
        /// <returns>the alignment score and the alignment (in a Result object) for sequenceA and sequenceB.  The calling function places the result in the dispay appropriately.
        ///
        public ResultTable.Result Align_And_Extract(Tuple <int, int> cell, GeneSequence sequenceA, GeneSequence sequenceB, bool banded)
        {
            ResultTable.Result result = new ResultTable.Result();
            int score;                                                       // place your computed alignment score here

            string[] alignment = new string[2];                              // place your two computed alignments here


            // ********* these are placeholder assignments that you'll replace with your code  *******
            string             a  = sequenceA.Sequence.Substring(0, sequenceA.Sequence.Length < MaxCharactersToAlign ? sequenceA.Sequence.Length : MaxCharactersToAlign);
            string             b  = sequenceB.Sequence.Substring(0, sequenceB.Sequence.Length < MaxCharactersToAlign ? sequenceB.Sequence.Length : MaxCharactersToAlign);
            DynamicProgramming dp = new DynamicProgramming(cell, a, b, banded);

            score        = dp.getScore();
            alignment[0] = dp.getResultA();
            alignment[1] = dp.getResultB();
            // ***************************************************************************************


            result.Update(score, alignment[0], alignment[1]);                  // bundling your results into the right object type
            return(result);
        }
Esempio n. 10
0
        /// <summary>
        /// this is the function you implement.
        /// </summary>
        /// <param name="sequenceA">the first sequence</param>
        /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param>
        /// <param name="banded">true if alignment should be band limited.</param>
        /// <returns>the alignment score and the alignment (in a Result object) for sequenceA and sequenceB.  The calling function places the result in the dispay appropriately.
        ///
        public ResultTable.Result Align_And_Extract(GeneSequence sequenceA, GeneSequence sequenceB, bool banded)
        {
            ResultTable.Result result = new ResultTable.Result();
            int score;                                                       // place your computed alignment score here

            string[] alignment = new string[2];                              // place your two computed alignments here

            MyAligner aligner = new MyAligner(sequenceA.Sequence, sequenceB.Sequence, banded, MaxCharactersToAlign);

            ;
            aligner.ExecuteAlignment();


            // ********* these are placeholder assignments that you'll replace with your code  *******
            score        = aligner.GetCost();
            alignment[0] = aligner.GetAlignedSequenceA();
            alignment[1] = aligner.GetAlignedSequenceB();
            // ***************************************************************************************


            result.Update(score, alignment[0], alignment[1]);                  // bundling your results into the right object type
            return(result);
        }
Esempio n. 11
0
        public ResultTable.Result Align_And_Extract(GeneSequence sequenceA, GeneSequence sequenceB, bool banded)
        {
            ResultTable.Result result = new ResultTable.Result();

            int score;

            string[] alignment = new string[2];
            String   word1;
            String   word2;                                       // place your two computed alignments here

            if (MaxCharactersToAlign < sequenceA.Sequence.Length) //grabs the two words I need to compare and crops them to the right size if needed.
            {
                word1 = sequenceA.Sequence.Substring(0, MaxCharactersToAlign);
            }
            else
            {
                word1 = sequenceA.Sequence;
            }
            if (MaxCharactersToAlign < sequenceB.Sequence.Length)
            {
                word2 = sequenceB.Sequence.Substring(0, MaxCharactersToAlign);
            }
            else
            {
                word2 = sequenceB.Sequence;
            }

            word1 = word1.Insert(0, "-");

            word2                = word2.Insert(0, "-");                      //add dash to the front of each one like on the hw.
            int[,] myarray       = new int [word1.Length, word2.Length];      //array for costs
            Direction[,] mydirec = new Direction[word1.Length, word2.Length]; //array for back edges.
            myarray[0, 0]        = 0;                                         //intialize first position for both of the arrays
            mydirec[0, 0]        = Direction.Finish;


            for (int x = 0; x < word1.Length; x++)
            {
                if (!banded)
                {
                    for (int y = 0; y < word2.Length; y++)  //go through both words  n by m is nm with n and m  being the length of the words.
                    {
                        int left     = int.MaxValue;
                        int up       = int.MaxValue;
                        int diagonal = int.MaxValue;
                        if (x == 0 && y == 0)
                        {
                            continue; //don't do first time.
                        }
                        if (y != 0)   //check left
                        {
                            left = myarray[x, y - 1] + 5;
                        }
                        if (x != 0)//check up
                        {
                            up = myarray[x - 1, y] + 5;
                        }
                        if (x != 0 && y != 0)// check diagonal
                        {
                            if (word1[x] == word2[y])
                            {
                                diagonal = myarray[x - 1, y - 1] - 3;//if same -3
                            }
                            else
                            {
                                diagonal = myarray[x - 1, y - 1] + 1;//add 1 if not
                            }
                        }
                        int       smallest = int.MaxValue;
                        Direction dir      = Direction.Finish;
                        if (left < smallest) //get the best result
                        {
                            dir      = Direction.Left;
                            smallest = left;
                        }
                        if (up <= smallest)
                        {
                            dir      = Direction.Up;
                            smallest = up;
                        }
                        if (diagonal <= smallest)
                        {
                            dir      = Direction.Diagonal;
                            smallest = diagonal;
                        }
                        myarray[x, y] = smallest;//set both arrays the results
                        mydirec[x, y] = dir;
                    }
                }

                else
                {
                    for (int y = x - 3; y < x + 4; y++)  //go through both words  n with k constant time being the banded length so nk in this case which is n.
                    {
                        if (y >= 0 && y < word2.Length)
                        {
                            int left     = int.MaxValue;
                            int up       = int.MaxValue;
                            int diagonal = int.MaxValue;
                            if (x == 0 && y == 0)
                            {
                                continue;//don't do first time.
                            }



                            if (y != 0)
                            {
                                left = myarray[x, y - 1] + 5;//check for left
                            }
                            if (x != 0)
                            {
                                up = myarray[x - 1, y] + 5; //check for up
                            }
                            if (x != 0 && y != 0)           //check diagonal
                            {
                                if (word1[x] == word2[y])
                                {
                                    diagonal = myarray[x - 1, y - 1] - 3;
                                }
                                else
                                {
                                    diagonal = myarray[x - 1, y - 1] + 1;
                                }
                            }
                            int       smallest = int.MaxValue;
                            Direction dir      = Direction.Finish;//get smallest one.
                            if (left < smallest)
                            {
                                dir      = Direction.Left;
                                smallest = left;
                            }
                            if (up <= smallest)
                            {
                                dir      = Direction.Up;
                                smallest = up;
                            }
                            if (diagonal <= smallest)
                            {
                                dir      = Direction.Diagonal;
                                smallest = diagonal;
                            }
                            myarray[x, y] = smallest;//update both values
                            mydirec[x, y] = dir;
                        }
                    }
                }
            }
            score = myarray[word1.Length - 1, word2.Length - 1];  //set the score to the last element
            Direction begining = mydirec[word1.Length - 1, word2.Length - 1];

            alignment[0] = "";
            alignment[1] = "";
            int i = word1.Length - 1;
            int j = word2.Length - 1;

            if (score == 0)//so if we cant' so it for banded stop now!
            {
                if (word2.Length > word1.Length + 3)
                {
                    score        = int.MaxValue;
                    alignment[0] = "No Alignment Possible";
                    alignment[1] = "No Alignment Possible";
                    result.Update(score, alignment[0], alignment[1]);
                    return(result);
                }
            }
            StringBuilder alignment0 = new StringBuilder(alignment[0]);
            StringBuilder alignment1 = new StringBuilder(alignment[1]);

            if (score == -6820)
            {
                Console.WriteLine(word1.Length);
                Console.WriteLine(word1[word1.Length - 1]);
                Console.WriteLine(word2.Length);
                Console.WriteLine(word2[word2.Length - 1]);
            }
            while (begining != Direction.Finish)//iterate through the path to build the word  which is order m +n
            {
                if (score == -6820)
                {
                    //  Console.WriteLine(begining);
                    //Console.WriteLine(alignment0.ToString());
                    //Console.WriteLine(alignment1.ToString());
                }
                switch (begining)
                {
                case Direction.Left:
                    alignment0 = alignment0.Insert(alignment0.Length, Char.ToString('-'));
                    alignment1 = alignment1.Insert(alignment1.Length, Char.ToString(word2[j]));
                    j--;

                    break;

                case Direction.Up:
                    alignment0 = alignment0.Insert(alignment0.Length, Char.ToString(word1[i]));
                    alignment1 = alignment1.Insert(alignment1.Length, Char.ToString('-'));
                    i--;
                    break;

                case Direction.Diagonal:
                    alignment0 = alignment0.Insert(alignment0.Length, Char.ToString(word1[i]));
                    alignment1 = alignment1.Insert(alignment1.Length, Char.ToString(word2[j]));
                    i--;
                    j--;
                    break;
                }
                begining = mydirec[i, j];
            }

            alignment[0] = alignment0.ToString();
            alignment[1] = alignment1.ToString();
            // ***************************************************************************************
            alignment[0] = new string(alignment[0].ToCharArray().Reverse().ToArray());//this would be another linear time to reverse it but still doesn't matter
            alignment[1] = new string(alignment[1].ToCharArray().Reverse().ToArray());
            if (alignment[0].Length > 100)
            {
                alignment[0] = alignment[0].Remove(100);
            }
            if (alignment[1].Length > 100)
            {
                alignment[1] = alignment[1].Remove(100);
            }

            result.Update(score, alignment[0], alignment[1]);                  // bundling your results into the right object type
            return(result);
        }
Esempio n. 12
0
        /// <summary>
        /// this is the function you implement.
        /// </summary>
        /// <param name="sequenceA">the first sequence</param>
        /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param>
        /// <param name="banded">true if alignment should be band limited.</param>
        /// <returns>the alignment score and the alignment (in a Result object) for sequenceA and sequenceB.  The calling function places the result in the dispay appropriately.
        ///
        public ResultTable.Result Align_And_Extract(GeneSequence sequenceA, GeneSequence sequenceB, bool banded)
        {
            ResultTable.Result result = new ResultTable.Result();
            int score;                                                       // place your computed alignment score here

            string[] alignment = new string[2];                              // place your two computed alignments here

            // ********* these are placeholder assignments that you'll replace with your code  *******
            score        = 0;
            alignment[0] = "";
            alignment[1] = "";
            int row_size = 0;
            int col_size = 0;
            int i_loop   = 0;
            int j_loop   = 0;

            char[] a = sequenceA.Sequence.ToCharArray();
            char[] b = sequenceB.Sequence.ToCharArray();
            i_loop = Math.Min(sequenceA.Sequence.Length, MaxCharactersToAlign);
            j_loop = Math.Min(sequenceB.Sequence.Length, MaxCharactersToAlign);

            //if the case it itself vs itself, jsut return all diagnols
            if (sequenceA.Sequence.Substring(0, i_loop).Equals(sequenceB.Sequence.Substring(0, j_loop)))
            {
                result.Update(MaxCharactersToAlign * -3, sequenceA.Sequence.Substring(0, i_loop), sequenceB.Sequence.Substring(0, j_loop));
                return(result);
            }



            if (!banded)//if the banded checkbox is false
            {
                //store the length of the first and seond sequence, which is used in the for loops
                //i_loop, row_size is n
                //j-loop, col_size is m
                //gives us the length of the genom or the max number we put in, like 5000 or 15000.
                row_size = Math.Min(a.Length, MaxCharactersToAlign) + 1;
                col_size = Math.Min(b.Length, MaxCharactersToAlign) + 1;
                //the 2D array we'll use to store the costs.
                int[,] e = new int[row_size, col_size];
                //the array to help us recreate the alignment strings with previous points.
                char[,] previous = new char[row_size, col_size];

                /*
                 * Assign all of the cells on the leftmost column to go 'up' with the cost of INDEL, or 5 .
                 * Time complexity: O(n) at worst because it could have n cells.
                 * Space complexity: O(nm) at worst for the entire 2D matrix the stores the costs ant the matrix that stores the previous pointers.
                 */
                for (int i = 0; i <= i_loop; i++)
                {
                    e[i, 0]        = i * indel_val;
                    previous[i, 0] = up;
                }

                /*
                 * Assign all of the cells on the top row to go 'left' with the cost of INDEL, or 5 .
                 * Time complexity: O(m) at worst because it could have m cells.
                 * Space complexity: O(nm) at worst for the entire 2D matrix the stores the costs ant the matrix that stores the previous pointers.
                 */
                for (int j = 1; j <= j_loop; j++)
                {
                    e[0, j]        = j * indel_val;
                    previous[0, j] = left;
                }

                /*
                 * Starting at the top left, we assign each cell a cost based on its neighbor's. This value = Min( Top, left, diag). Then we store the direction that we came from into the previous matrix.
                 * Time complexity: O(nm) because we iterate over every cell.
                 * Space complexity : O(nm)  because we store every cell.
                 */
                for (int i = 1; i <= i_loop; i++)
                {
                    for (int j = 1; j <= j_loop; j++)
                    {
                        int UP   = e[i - 1, j] + indel_val;
                        int LEFT = e[i, j - 1] + indel_val;
                        int DIAG = e[i - 1, j - 1] + matcher(a[i - 1], b[j - 1]);
                        int min  = Math.Min(Math.Min(UP, LEFT), DIAG);

                        e[i, j] = min;

                        if (DIAG == min)
                        {
                            previous[i, j] = diag;
                        }
                        else if (UP == min)
                        {
                            previous[i, j] = up;
                        }
                        else
                        {
                            previous[i, j] = left;
                        }
                    }
                }
                //assign the total min cost
                score = e[i_loop, j_loop];

                //make the alignment strings
                alignment = makeAligments(i_loop, j_loop, previous, a, b);

                result.Update(score, alignment[0], alignment[1]);
                return(result);
            }
            else//banded -------------------------------------------------------------------------------------------------------------------------------------
            {
                //initialize some values.
                row_size = Math.Min(a.Length, MaxCharactersToAlign) + 1;
                col_size = Math.Min(b.Length, MaxCharactersToAlign) + 1;
                i_loop   = Math.Min(a.Length, MaxCharactersToAlign); //length1
                j_loop   = Math.Min(b.Length, MaxCharactersToAlign); //length2

                //check to see if we can actually do this operation because the banding will not calculate strings that are too different in size.
                if (Math.Abs(i_loop - j_loop) > 3)
                {
                    score        = int.MaxValue;
                    alignment[0] = "No Alignment Possible";
                    alignment[1] = "No Alignment Possible";
                    result.Update(score, alignment[0], alignment[1]);
                    return(result);
                }


                int[,] e         = new int[row_size, col_size];  //the 2D array we'll use to solve our problem.
                char[,] previous = new char[row_size, col_size]; //the array to help us recreate the alignment strings.

                /*
                 * Assign 4 cells on the leftmost column to go 'up' with the cost of INDEL, or 5 .
                 * Time complexity: O(n) at worst because it could have n cells which happens to be less that 4.
                 * Space complexity: O(nm) at worst for the entire 2D matrix the stores the costs ant the matrix that stores the previous pointers.
                 */
                for (int i = 0; i <= Math.Min(i_loop, 3); i++)
                {
                    e[i, 0]        = i * indel_val;
                    previous[i, 0] = up;
                }

                /*
                 * Assign 4 cells on the top row to go 'left' with the cost of INDEL, or 5 .
                 * Time complexity: O(m) at worst because it could have n cells which happens to be less that 4.
                 * Space complexity: O(nm) at worst for the entire 2D matrix the stores the costs ant the matrix that stores the previous pointers.
                 */
                for (int j = 1; j <= Math.Min(j_loop, 3); j++)
                {
                    e[0, j]        = j * indel_val;
                    previous[0, j] = left;
                }


                /*
                 * Starting at the top left, we assign each cell a cost based on its neighbor's. This value = Min( Top, left, diag). We are only caluculating 7 consecutive cells in a row.
                 *   This makes the operation O(7n + 7m) or O(n + m) when constants are dropped.
                 * Time complexity: O(n + m) because with the banding we compare each character in the first sequence against no more than 7 characters in the second sequence and vise versa.
                 * Space complexity : O(nm)  because we store every cell.
                 */
                for (int i = 1; i <= i_loop; i++)
                {
                    //me must do a calculation for how much of the current row we're on
                    int start = Math.Max(i - 3, 1);
                    int end   = Math.Min(j_loop, i + 3);
                    for (int j = start; j <= end; j++)
                    {
                        //We force the operations in banded by setting up and left directions to it and
                        //we only correct it if there is not 3 of more indels.
                        int UP   = int.MaxValue;
                        int LEFT = int.MaxValue;
                        if (j != i + 3)
                        {
                            UP = e[i - 1, j] + indel_val;
                        }
                        if (j != i - 3)
                        {
                            LEFT = e[i, j - 1] + indel_val;
                        }
                        int DIAG = e[i - 1, j - 1] + matcher(a[i - 1], b[j - 1]);
                        int min  = Math.Min(Math.Min(UP, LEFT), DIAG);

                        e[i, j] = min;

                        if (DIAG == min)
                        {
                            previous[i, j] = diag;
                        }

                        else if (UP == min)
                        {
                            previous[i, j] = up;
                        }

                        else
                        {
                            previous[i, j] = left;
                        }
                    }
                }
                //set the score from the last value we calculated
                score = e[row_size - 1, col_size - 1];

                //make the alignment strings
                alignment = makeAligments(i_loop, j_loop, previous, a, b);

                result.Update(score, alignment[0], alignment[1]);
                return(result);
            }
        }
Esempio n. 13
0
        /// <summary>
        /// this is the function you implement.
        /// </summary>
        /// <param name="sequenceA">the first sequence</param>
        /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param>
        /// <param name="banded">true if alignment should be band limited.</param>
        /// <returns>the alignment score and the alignment (in a Result object) for sequenceA and sequenceB.  The calling function places the result in the dispay appropriately.
        ///
        public ResultTable.Result Align_And_Extract(GeneSequence sequenceA, GeneSequence sequenceB, bool banded)
        {
            // ---------------------------------------------------------------------------------------------
            // Setup section. Also returns without calculation for banded analysis of sequences which will be impossible
            // O(1)

            int maxlength = MaxCharactersToAlign;

            const int indel = 5;
            const int sub   = 1;
            const int match = -3;

            //Console.WriteLine("Sequence a: " + sequenceA.Sequence);
            //Console.WriteLine("Sequence b: " + sequenceB.Sequence);

            int lengthA, lengthB;

            if (sequenceA.Sequence.Length > maxlength)
            {
                lengthA = maxlength;
            }
            else
            {
                lengthA = sequenceA.Sequence.Length;
            }

            if (sequenceB.Sequence.Length > maxlength)
            {
                lengthB = maxlength;
            }
            else
            {
                lengthB = sequenceB.Sequence.Length;
            }


            ResultTable.Result result = new ResultTable.Result();
            int score;                                                       // place your computed alignment score here

            string[] alignment = new string[2];                              // place your two computed alignments here
            // these "alignments" are just the two strings, with "-" added where an insertion/deletion has occured. simple enough.

            // ********* these are placeholder assignments that you'll replace with your code  *******
            score        = 0;
            alignment[0] = "";
            alignment[1] = "";
            // ***************************************************************************************

            // We will not be able to get a banded result if the lengths differ by more than 3
            if (banded && Math.Abs(lengthA - lengthB) > 3)
            {
                score = int.MaxValue;
                result.Update(score, "No Alignment Possible", "No Alignment Possible");
                return(result);
            }



            // Sequence length because it has to fit the string AND a space for the "empty string" at the beginning
            node[,] calcTable = new node[lengthA + 1, lengthB + 1];

            // Go through the entire table to calculate the things.
            int a, b = 0;

            // do top left corner first
            calcTable[0, 0] = new node(-1, -1, 0);


            // END setup section
            // -----------------------------------------------------------------------------------------------------------


            if (banded)
            {
                //--------------------------------------------------------------------------------------------------------
                // Banded table scores calculation.
                // It goes through the length of A, each time doing 7 calculations for B.
                // O(7n) - where n is the length of A
                // Also note that the length difference between A and B is MAX 4, so O(n)

                // Do entire top row
                for (a = 1; a < 4; a++)
                {
                    calcTable[a, 0] = new node(a - 1, 0, a * indel);
                }

                // And entire left row
                for (b = 1; b < 4; b++)
                {
                    calcTable[0, b] = new node(0, b - 1, b * indel);
                }

                // and the rest
                for (a = 1; a < lengthA + 1; a++)
                {
                    for (b = a - 3; b < a + 4; b++)
                    { // Can only calculate in the band
                        if (b < 1 || b > lengthB)
                        {
                            // Can't be having those System.IndexOutOfRangeExceptions, can we?
                            continue;
                        }

                        int topCost, leftCost, diagCost;

                        // Calculate cost of coming from top
                        if (calcTable[a, b - 1] == null)
                        {
                            topCost = int.MaxValue;         // null is very bad.
                        }
                        else
                        {
                            topCost = calcTable[a, b - 1].score + indel;        // coming from top is an insert/delete
                        }



                        // Calculate cost of coming from left
                        if (calcTable[a - 1, b] == null)
                        {
                            leftCost = int.MaxValue;        // again, null is very bad
                        }
                        else
                        {
                            leftCost = calcTable[a - 1, b].score + indel;       // coming from left is also an insert/delete
                        }

                        // Calculate cost of coming from the diagonal
                        // We don't worry about nulls here, because they are impossible
                        if (sequenceA.Sequence[a - 1] == sequenceB.Sequence[b - 1])
                        {                                                     // If the two strings match at this character
                            diagCost = calcTable[a - 1, b - 1].score + match; // coming from diagonal on a match!
                        }
                        else
                        {
                            diagCost = calcTable[a - 1, b - 1].score + sub;     // coming from diagonal on a substitution
                        }


                        // Now to make our table entry
                        if (diagCost <= leftCost && diagCost <= topCost)
                        {          // Diagonal is cheapest
                            calcTable[a, b] = new node(a - 1, b - 1, diagCost);
                        }
                        else if (leftCost <= diagCost && leftCost <= topCost)
                        {   // Left is cheapest
                            calcTable[a, b] = new node(a - 1, b, leftCost);
                        }
                        else
                        {                                                    // Top is cheapest
                            calcTable[a, b] = new node(a, b - 1, topCost);
                        }
                    }
                }
                // END Banded table scores calculation
                //--------------------------------------------------------------------------------
            }
            else     // ends our if section for banded; starts our section to calculate unbanded


            // ----------------------------------------------------------------------------------
            // Start unbanded table scores calculation
            // Goes through the length of A, each time going through the length of B.
            // Therefore O(n*m), where n is the length of A, and m is the length of B.


            // Do the entire top row first
            {
                for (a = 1; a < lengthA + 1; a++)
                {
                    calcTable[a, 0] = new node(a - 1, 0, a * indel);
                }
                // and the entire left row
                for (b = 1; b < lengthB + 1; b++)
                {      // Skipping the first one which was already done
                    calcTable[0, b] = new node(0, b - 1, b * indel);
                }

                // And the rest
                for (a = 1; a < lengthA + 1; a++)
                {
                    for (b = 1; b < lengthB + 1; b++)
                    {
                        // Calculate cost of coming from top
                        int topCost = calcTable[a, b - 1].score + indel;        // coming from top is an insert/delete

                        // Calculate cost of coming from left
                        int leftCost = calcTable[a - 1, b].score + indel;       // coming from left is also an insert/delete

                        // Calculate cost of coming from the diagonal
                        int diagCost;
                        if (sequenceA.Sequence[a - 1] == sequenceB.Sequence[b - 1])
                        {                                                     // If the two strings match at this character
                            diagCost = calcTable[a - 1, b - 1].score + match; // coming from diagonal on a match!
                        }
                        else
                        {
                            diagCost = calcTable[a - 1, b - 1].score + sub;     // coming from diagonal on a substitution
                        }


                        // Now to make our table entry
                        if (diagCost <= leftCost && diagCost <= topCost)
                        {          // Diagonal is cheapest
                            calcTable[a, b] = new node(a - 1, b - 1, diagCost);
                        }
                        else if (leftCost <= diagCost && leftCost <= topCost)
                        {   // Left is cheapest
                            calcTable[a, b] = new node(a - 1, b, leftCost);
                        }
                        else
                        {                                                    // Top is cheapest
                            calcTable[a, b] = new node(a, b - 1, topCost);
                        }
                    }
                }
                //END unbanded calculation
                //---------------------------------------------------------------------------------------------
            }   // This ends the difference between banded and unbanded calculation


            // --------------------------------------------------------------------------------------------------------------------------
            // At this point our scores table should be complete. Now we just take the final node and walk back to the beginning with it
            // The length of this string is the larger of the length of A and the length of B
            // O(max(n,m)) where n is A's length and m is B's length
            a = lengthA;
            b = lengthB;

            StringBuilder strA = new StringBuilder(maxlength);
            StringBuilder strB = new StringBuilder(maxlength);



            // Go until we hit either the top or left row/column
            while (a != 0 && b != 0)
            {
                int parent_a = calcTable[a, b].parent_x;
                int parent_b = calcTable[a, b].parent_y;
                if (parent_a < a)                    // To get here, we came from left
                //alignment[0].Insert(0,sequenceA.Sequence[a-1].ToString());                      // Which means we used a char of sequence a
                {
                    strA.Insert(0, sequenceA.Sequence[a - 1].ToString());
                    if (parent_b < b)                 // Also came from top = came from diagonal
                    //alignment[1].Insert(0, sequenceB.Sequence[b-1].ToString());                 // Whicn means we ALSO used a char of sequence b
                    {
                        strB.Insert(0, sequenceB.Sequence[b - 1].ToString());
                    }
                    else                                            // Only came from left
                    //alignment[1].Insert(0, "-");                                                // Used a char of sequence a but not b
                    {
                        strB.Insert(0, "-");
                    }
                }
                else
                {
                    //alignment[0].Insert(0, "-");                    // Did not come from left nor diagonal; must have been from top
                    strA.Insert(0, "-");
                    //alignment[1].Insert(0, sequenceB.Sequence[b-1].ToString());
                    strB.Insert(0, sequenceB.Sequence[b - 1].ToString());
                }
                a = parent_a;
                b = parent_b;
            }

            // Assume we hit the left column. This means a = 0 and b is getting smaller
            // This means we have used all of sequence a already.
            while (b != 0)
            {
                int parent_a = 0;
                int parent_b = calcTable[a, b].parent_y;

                //alignment[0].Insert(0,"-");     // Already used all of a, so it has gaps at the beginning
                strA.Insert(0, "-");
                //alignment[1].Insert(0, sequenceB.Sequence[b-1].ToString());
                strB.Insert(0, sequenceB.Sequence[b - 1].ToString());

                a = parent_a;
                b = parent_b;
            }

            // Assume we hit the top row. This means b = 0 and a is getting smaller
            // This means we have used all of sequence b already.
            while (a != 0)
            {
                int parent_a = calcTable[a, b].parent_x;
                int parent_b = 0;

                //alignment[0] = alignment[0].Insert(0, sequenceA.Sequence[a - 1].ToString());     // Already used all of a, so it has gaps at the beginning
                strA.Insert(0, sequenceA.Sequence[a - 1].ToString());
                //alignment[1] = alignment[1].Insert(0, "-");
                strB.Insert(0, "-");

                a = parent_a;
                b = parent_b;
            }

            // END final string calculation
            //---------------------------------------------------------------------------------------------------


            //---------------------------------------------------------------------------------------------------
            // From here on out, we are just wrapping up the calculations and returning the results
            // O(1)


            // If we reach here, we should have traced our strings back to the beginning. The alignment strings should be all good and we just need to get the score
            score = calcTable[lengthA, lengthB].score;
            result.Update(score, strA.ToString(), strB.ToString());//alignment[0],alignment[1]);                  // bundling your results into the right object type
            return(result);
        }
Esempio n. 14
0
        /////////////////////////////////////////////////////////////////////////////////////////////////////////////////
        /////////////////////////////////////////////////////////////////////////////////////////////////////////////////
        /////////////////////////////////////////////////////////////////////////////////////////////////////////////////
        /// <summary>
        /// this is the function you implement.
        /// </summary>
        /// <param name="sequenceA">the first sequence</param>
        /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param>
        /// <param name="banded">true if alignment should be band limited.</param>
        /// <returns>the alignment score and the alignment (in a Result object) for sequenceA and sequenceB.  The calling function places the result in the dispay appropriately.
        /// 
        public ResultTable.Result Align_And_Extract(GeneSequence sequenceA, GeneSequence sequenceB, bool banded)
        {
            ResultTable.Result result = new ResultTable.Result();
            int score;                                                       // place your computed alignment score here
            string[] alignment = new string[2];                              // place your two computed alignments here


            // ********* these are placeholder assignments that you'll replace with your code  *******
            score = 0;                                                
            alignment[0] = "";
            alignment[1] = "";
            // ***************************************************************************************
            if (!banded)
                unrestrictedAlgorithm(ref score, ref alignment, ref sequenceA, ref sequenceB);
            else
                bandedAlgorithm(ref score, ref alignment, ref sequenceA, ref sequenceB);

            result.Update(score, alignment[0], alignment[1]);                  // bundling your results into the right object type 
            return (result);
        }
Esempio n. 15
0
 private void fillMatrix()
 {
     int alignLen = int.Parse(bandlengthBox.Text);
     ResultTable.Result result;
     PairWiseAlign processor = new PairWiseAlign(alignLen);
     for (int x = 0; x < NUMBER_OF_SEQUENCES; ++x)
     {
         for (int y = x; y < NUMBER_OF_SEQUENCES; ++y)
         {
             // if the two sequences are the same, fill that cell with -3*length because they match
             if (x == y)
             {
                 result = new ResultTable.Result();
                 PairWiseAlign align = new PairWiseAlign();
                 int lengthOfSequenceA = Math.Min(m_sequences[x].Sequence.Length, align.getMaxhCharactersToAlign());
                 int score = -3 * lengthOfSequenceA;
                 string alignment = m_sequences[x].Sequence.Substring(0, lengthOfSequenceA);
                 result.Update(score, alignment, alignment);
             }
             else
             {
                 result = processor.Align_And_Extract(m_sequences[x], m_sequences[y], bandCheckBox.Checked);                 // ********* hook to the student's code
             }
             m_resultTable.AddResult(x,y,result);
             m_resultTable.DisplayResult(x, y);
         }
     }
 }