///////////////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// this is the function you implement. /// </summary> /// <param name="sequenceA">the first sequence</param> /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param> /// <param name="banded">true if alignment should be band limited.</param> /// <returns>the alignment score and the alignment (in a Result object) for sequenceA and sequenceB. The calling function places the result in the dispay appropriately. /// public ResultTable.Result Align_And_Extract(GeneSequence sequenceA, GeneSequence sequenceB, bool banded) { ResultTable.Result result = new ResultTable.Result(); int score; // place your computed alignment score here string[] alignment = new string[2]; // place your two computed alignments here // ********* these are placeholder assignments that you'll replace with your code ******* score = 0; alignment[0] = ""; alignment[1] = ""; // *************************************************************************************** if (!banded) { unrestrictedAlgorithm(ref score, ref alignment, ref sequenceA, ref sequenceB); } else { bandedAlgorithm(ref score, ref alignment, ref sequenceA, ref sequenceB); } result.Update(score, alignment[0], alignment[1]); // bundling your results into the right object type return(result); }
private void fillMatrix() { int alignLen = int.Parse(bandlengthBox.Text); ResultTable.Result result; PairWiseAlign processor = new PairWiseAlign(alignLen); for (int x = 0; x < NUMBER_OF_SEQUENCES; ++x) { for (int y = x; y < NUMBER_OF_SEQUENCES; ++y) { // if the two sequences are the same, fill that cell with -3*length because they match if (x == y) { result = new ResultTable.Result(); PairWiseAlign align = new PairWiseAlign(); int lengthOfSequenceA = Math.Min(m_sequences[x].Sequence.Length, align.getMaxhCharactersToAlign()); int score = -3 * lengthOfSequenceA; string alignment = m_sequences[x].Sequence.Substring(0, lengthOfSequenceA); result.Update(score, alignment, alignment); } else { result = processor.Align_And_Extract(m_sequences[x], m_sequences[y], bandCheckBox.Checked); // ********* hook to the student's code } m_resultTable.AddResult(x, y, result); m_resultTable.DisplayResult(x, y); } } }
/// <summary> /// this is the function you implement. /// </summary> /// <param name="sequenceA">the first sequence</param> /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param> /// <param name="banded">true if alignment should be band limited.</param> /// <returns>the alignment score and the alignment (in a Result object) for sequenceA and sequenceB. The calling function places the result in the display appropriately. /// public ResultTable.Result Align_And_Extract(GeneSequence sequenceA, GeneSequence sequenceB, bool banded) { ResultTable.Result result = new ResultTable.Result(); int score; // place your computed alignment score here string[] alignment = new string[2]; // place your two computed alignments here int sub = MaxCharactersToAlign; if (sequenceA.Sequence.Length < sub) { sub = sequenceA.Sequence.Length; } int sub2 = MaxCharactersToAlign; if (sequenceB.Sequence.Length < sub2) { sub2 = sequenceB.Sequence.Length; } // ********* these are placeholder assignments that you'll replace with your code ******* score = int.MaxValue; alignment[0] = "No Alignment Possible"; alignment[1] = "No Alignment Possible"; EditDistance editor; if (banded) { if (Math.Abs(sub2 - sub) > Bandwidth) { result.Update(score, alignment[0], alignment[1]); // bundling your results into the right object type return(result); } editor = new EditDistance(sequenceA.Sequence.Substring(0, sub), sequenceB.Sequence.Substring(0, sub2)); editor.setupBanded(); //Console.WriteLine(editor.toString()); alignment = editor.bandedResults(); //Console.WriteLine(editor.toString()); score = editor.value(); } else { editor = new EditDistance(sequenceA.Sequence.Substring(0, sub), sequenceB.Sequence.Substring(0, sub2)); editor.setupUnbanded(); alignment = editor.results(); score = editor.value(); } // *************************************************************************************** result.Update(score, alignment[0], alignment[1]); // bundling your results into the right object type return(result); }
/// <summary> /// this is the function you implement. /// </summary> /// <param name="sequenceA">the first sequence</param> /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param> /// <param name="banded">true if alignment should be band limited.</param> /// <returns>the alignment score and the alignment (in a Result object) for sequenceA and sequenceB. The calling function places the result in the dispay appropriately. /// public ResultTable.Result Align_And_Extract(GeneSequence sequenceA, GeneSequence sequenceB, bool banded) { ResultTable.Result result = new ResultTable.Result(); int score; // place your computed alignment score here string[] alignment = new string[2]; // place your two computed alignments here alignment[0] = alignment[1] = ""; int maxLengthVal = banded ? 15001 : MaxCharactersToAlign; // If the sequences are longer than the desired alignment length, align only the desired amount. int rows = maxLengthVal < sequenceA.Sequence.Length + 1? maxLengthVal : sequenceA.Sequence.Length + 1; int cols = maxLengthVal < sequenceB.Sequence.Length + 1? maxLengthVal : sequenceB.Sequence.Length + 1; // Create the cost matrix and the matrix used to track the path. int[,] matrix = new int[rows, cols]; int[,] prev = new int[rows, cols]; initializeMatrices(matrix, prev, rows, cols); // If it's not banded, do the unrestriced algorithm. Otherwise do banded. if (!banded) { unrestricted(matrix, prev, rows, cols, sequenceA, sequenceB); } else { bandedAlg(matrix, prev, rows, cols, sequenceA, sequenceB); } // The score is stored in the last cell. score = matrix[rows - 1, cols - 1]; // Find the alignment strings by using the path stored in prev findAlignments(alignment, prev, rows, cols, score, sequenceA.Sequence, sequenceB.Sequence); // If the strings are too long to display, just display 100 characters. if (alignment[0].Length > 100) { alignment[0] = alignment[0].Substring(0, 100); } if (alignment[1].Length > 100) { alignment[1] = alignment[1].Substring(0, 100); } result.Update(score, alignment[0], alignment[1]); // bundling your results into the right object type return(result); }
/// <summary> /// this is the function you implement. /// </summary> /// <param name="sequenceA">the first sequence</param> /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param> /// <param name="banded">true if alignment should be band limited.</param> /// <returns>the alignment score and the alignment (in a Result object) for sequenceA and sequenceB. The calling function places the result in the dispay appropriately. /// public ResultTable.Result Align_And_Extract(Tuple<int,int> cell, GeneSequence sequenceA, GeneSequence sequenceB, bool banded) { ResultTable.Result result = new ResultTable.Result(); int score; // place your computed alignment score here string[] alignment = new string[2]; // place your two computed alignments here // ********* these are placeholder assignments that you'll replace with your code ******* string a = sequenceA.Sequence.Substring(0, sequenceA.Sequence.Length < MaxCharactersToAlign ? sequenceA.Sequence.Length : MaxCharactersToAlign); string b = sequenceB.Sequence.Substring(0, sequenceB.Sequence.Length < MaxCharactersToAlign ? sequenceB.Sequence.Length : MaxCharactersToAlign); DynamicProgramming dp = new DynamicProgramming(cell, a, b, banded); score = dp.getScore(); alignment[0] = dp.getResultA(); alignment[1] = dp.getResultB(); // *************************************************************************************** result.Update(score,alignment[0],alignment[1]); // bundling your results into the right object type return(result); }
/// <summary> /// this is the function you implement. /// </summary> /// <param name="sequenceA">the first sequence</param> /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param> /// <param name="banded">true if alignment should be band limited.</param> /// <returns>the alignment score and the alignment (in a Result object) for sequenceA and sequenceB. The calling function places the result in the dispay appropriately. /// public ResultTable.Result Align_And_Extract(GeneSequence sequenceA, GeneSequence sequenceB, bool banded) { ResultTable.Result result = new ResultTable.Result(); int score; // place your computed alignment score here string[] alignment = new string[2]; // place your two computed alignments here // ********* these are placeholder assignments that you'll replace with your code ******* GenomeSequencer genomeSequencer = new GenomeSequencer(sequenceA, sequenceB, MaxCharactersToAlign); score = genomeSequencer.calculateSequenceCost(banded); //score = 0; alignment[0] = ""; alignment[1] = ""; // *************************************************************************************** result.Update(score, alignment[0], alignment[1]); // bundling your results into the right object type return(result); }
/// <summary> /// this is the function you implement. /// </summary> /// <param name="sequenceA">the first sequence</param> /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param> /// <param name="banded">true if alignment should be band limited.</param> /// <returns>the alignment score and the alignment (in a Result object) for sequenceA and sequenceB. The calling function places the result in the dispay appropriately. /// public ResultTable.Result Align_And_Extract(GeneSequence sequenceA, GeneSequence sequenceB, bool banded) { ResultTable.Result result = new ResultTable.Result(); int score; // place your computed alignment score here string[] alignment = new string[2]; // place your two computed alignments here alignment[0] = alignment[1] = ""; int maxLengthVal = banded ? 15001 : MaxCharactersToAlign; int rows = maxLengthVal < sequenceA.Sequence.Length + 1? maxLengthVal : sequenceA.Sequence.Length + 1; int cols = maxLengthVal < sequenceB.Sequence.Length + 1? maxLengthVal : sequenceB.Sequence.Length + 1; int[,] matrix = new int[rows, cols]; int[,] prev = new int[rows, cols]; initializeMatrices(matrix, prev, rows, cols); if (!banded) { unrestricted(matrix, prev, rows, cols, sequenceA, sequenceB); } else { bandedAlg(matrix, prev, rows, cols, sequenceA, sequenceB); } score = matrix[rows - 1, cols - 1]; findAlignments(alignment, prev, rows, cols, score, sequenceA.Sequence, sequenceB.Sequence); if (alignment[0].Length > 100) { alignment[0] = alignment[0].Substring(0, 100); } if (alignment[1].Length > 100) { alignment[1] = alignment[1].Substring(0, 100); } result.Update(score, alignment[0], alignment[1]); // bundling your results into the right object type return(result); }
/// <summary> /// this is the function you implement. /// </summary> /// <param name="sequenceA">the first sequence</param> /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param> /// <param name="banded">true if alignment should be band limited.</param> /// <returns>the alignment score and the alignment (in a Result object) for sequenceA and sequenceB. The calling function places the result in the dispay appropriately. /// public ResultTable.Result Align_And_Extract(GeneSequence sequenceA, GeneSequence sequenceB, bool banded) { ResultTable.Result result = new ResultTable.Result(); int score; // place your computed alignment score here string[] alignment = new string[2]; // place your two computed alignments here Algo algo = new Algo(sequenceA, sequenceB, false, 5000); algo.RunAlgo(); algo.CalcStrings(); //algo.PrintArray(); // ********* these are placeholder assignments that you'll replace with your code ******* score = algo.GetScore(); alignment[0] = algo.GetRowString(); alignment[1] = algo.GetColString(); // *************************************************************************************** result.Update(score, alignment[0], alignment[1]); // bundling your results into the right object type return(result); }
/// <summary> /// this is the function you implement. /// </summary> /// <param name="sequenceA">the first sequence</param> /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param> /// <param name="banded">true if alignment should be band limited.</param> /// <returns>the alignment score and the alignment (in a Result object) for sequenceA and sequenceB. The calling function places the result in the dispay appropriately. /// public ResultTable.Result Align_And_Extract(Tuple <int, int> cell, GeneSequence sequenceA, GeneSequence sequenceB, bool banded) { ResultTable.Result result = new ResultTable.Result(); int score; // place your computed alignment score here string[] alignment = new string[2]; // place your two computed alignments here // ********* these are placeholder assignments that you'll replace with your code ******* string a = sequenceA.Sequence.Substring(0, sequenceA.Sequence.Length < MaxCharactersToAlign ? sequenceA.Sequence.Length : MaxCharactersToAlign); string b = sequenceB.Sequence.Substring(0, sequenceB.Sequence.Length < MaxCharactersToAlign ? sequenceB.Sequence.Length : MaxCharactersToAlign); DynamicProgramming dp = new DynamicProgramming(cell, a, b, banded); score = dp.getScore(); alignment[0] = dp.getResultA(); alignment[1] = dp.getResultB(); // *************************************************************************************** result.Update(score, alignment[0], alignment[1]); // bundling your results into the right object type return(result); }
/// <summary> /// this is the function you implement. /// </summary> /// <param name="sequenceA">the first sequence</param> /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param> /// <param name="banded">true if alignment should be band limited.</param> /// <returns>the alignment score and the alignment (in a Result object) for sequenceA and sequenceB. The calling function places the result in the dispay appropriately. /// public ResultTable.Result Align_And_Extract(GeneSequence sequenceA, GeneSequence sequenceB, bool banded) { ResultTable.Result result = new ResultTable.Result(); int score; // place your computed alignment score here string[] alignment = new string[2]; // place your two computed alignments here MyAligner aligner = new MyAligner(sequenceA.Sequence, sequenceB.Sequence, banded, MaxCharactersToAlign); ; aligner.ExecuteAlignment(); // ********* these are placeholder assignments that you'll replace with your code ******* score = aligner.GetCost(); alignment[0] = aligner.GetAlignedSequenceA(); alignment[1] = aligner.GetAlignedSequenceB(); // *************************************************************************************** result.Update(score, alignment[0], alignment[1]); // bundling your results into the right object type return(result); }
public ResultTable.Result Align_And_Extract(GeneSequence sequenceA, GeneSequence sequenceB, bool banded) { ResultTable.Result result = new ResultTable.Result(); int score; string[] alignment = new string[2]; String word1; String word2; // place your two computed alignments here if (MaxCharactersToAlign < sequenceA.Sequence.Length) //grabs the two words I need to compare and crops them to the right size if needed. { word1 = sequenceA.Sequence.Substring(0, MaxCharactersToAlign); } else { word1 = sequenceA.Sequence; } if (MaxCharactersToAlign < sequenceB.Sequence.Length) { word2 = sequenceB.Sequence.Substring(0, MaxCharactersToAlign); } else { word2 = sequenceB.Sequence; } word1 = word1.Insert(0, "-"); word2 = word2.Insert(0, "-"); //add dash to the front of each one like on the hw. int[,] myarray = new int [word1.Length, word2.Length]; //array for costs Direction[,] mydirec = new Direction[word1.Length, word2.Length]; //array for back edges. myarray[0, 0] = 0; //intialize first position for both of the arrays mydirec[0, 0] = Direction.Finish; for (int x = 0; x < word1.Length; x++) { if (!banded) { for (int y = 0; y < word2.Length; y++) //go through both words n by m is nm with n and m being the length of the words. { int left = int.MaxValue; int up = int.MaxValue; int diagonal = int.MaxValue; if (x == 0 && y == 0) { continue; //don't do first time. } if (y != 0) //check left { left = myarray[x, y - 1] + 5; } if (x != 0)//check up { up = myarray[x - 1, y] + 5; } if (x != 0 && y != 0)// check diagonal { if (word1[x] == word2[y]) { diagonal = myarray[x - 1, y - 1] - 3;//if same -3 } else { diagonal = myarray[x - 1, y - 1] + 1;//add 1 if not } } int smallest = int.MaxValue; Direction dir = Direction.Finish; if (left < smallest) //get the best result { dir = Direction.Left; smallest = left; } if (up <= smallest) { dir = Direction.Up; smallest = up; } if (diagonal <= smallest) { dir = Direction.Diagonal; smallest = diagonal; } myarray[x, y] = smallest;//set both arrays the results mydirec[x, y] = dir; } } else { for (int y = x - 3; y < x + 4; y++) //go through both words n with k constant time being the banded length so nk in this case which is n. { if (y >= 0 && y < word2.Length) { int left = int.MaxValue; int up = int.MaxValue; int diagonal = int.MaxValue; if (x == 0 && y == 0) { continue;//don't do first time. } if (y != 0) { left = myarray[x, y - 1] + 5;//check for left } if (x != 0) { up = myarray[x - 1, y] + 5; //check for up } if (x != 0 && y != 0) //check diagonal { if (word1[x] == word2[y]) { diagonal = myarray[x - 1, y - 1] - 3; } else { diagonal = myarray[x - 1, y - 1] + 1; } } int smallest = int.MaxValue; Direction dir = Direction.Finish;//get smallest one. if (left < smallest) { dir = Direction.Left; smallest = left; } if (up <= smallest) { dir = Direction.Up; smallest = up; } if (diagonal <= smallest) { dir = Direction.Diagonal; smallest = diagonal; } myarray[x, y] = smallest;//update both values mydirec[x, y] = dir; } } } } score = myarray[word1.Length - 1, word2.Length - 1]; //set the score to the last element Direction begining = mydirec[word1.Length - 1, word2.Length - 1]; alignment[0] = ""; alignment[1] = ""; int i = word1.Length - 1; int j = word2.Length - 1; if (score == 0)//so if we cant' so it for banded stop now! { if (word2.Length > word1.Length + 3) { score = int.MaxValue; alignment[0] = "No Alignment Possible"; alignment[1] = "No Alignment Possible"; result.Update(score, alignment[0], alignment[1]); return(result); } } StringBuilder alignment0 = new StringBuilder(alignment[0]); StringBuilder alignment1 = new StringBuilder(alignment[1]); if (score == -6820) { Console.WriteLine(word1.Length); Console.WriteLine(word1[word1.Length - 1]); Console.WriteLine(word2.Length); Console.WriteLine(word2[word2.Length - 1]); } while (begining != Direction.Finish)//iterate through the path to build the word which is order m +n { if (score == -6820) { // Console.WriteLine(begining); //Console.WriteLine(alignment0.ToString()); //Console.WriteLine(alignment1.ToString()); } switch (begining) { case Direction.Left: alignment0 = alignment0.Insert(alignment0.Length, Char.ToString('-')); alignment1 = alignment1.Insert(alignment1.Length, Char.ToString(word2[j])); j--; break; case Direction.Up: alignment0 = alignment0.Insert(alignment0.Length, Char.ToString(word1[i])); alignment1 = alignment1.Insert(alignment1.Length, Char.ToString('-')); i--; break; case Direction.Diagonal: alignment0 = alignment0.Insert(alignment0.Length, Char.ToString(word1[i])); alignment1 = alignment1.Insert(alignment1.Length, Char.ToString(word2[j])); i--; j--; break; } begining = mydirec[i, j]; } alignment[0] = alignment0.ToString(); alignment[1] = alignment1.ToString(); // *************************************************************************************** alignment[0] = new string(alignment[0].ToCharArray().Reverse().ToArray());//this would be another linear time to reverse it but still doesn't matter alignment[1] = new string(alignment[1].ToCharArray().Reverse().ToArray()); if (alignment[0].Length > 100) { alignment[0] = alignment[0].Remove(100); } if (alignment[1].Length > 100) { alignment[1] = alignment[1].Remove(100); } result.Update(score, alignment[0], alignment[1]); // bundling your results into the right object type return(result); }
/// <summary> /// this is the function you implement. /// </summary> /// <param name="sequenceA">the first sequence</param> /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param> /// <param name="banded">true if alignment should be band limited.</param> /// <returns>the alignment score and the alignment (in a Result object) for sequenceA and sequenceB. The calling function places the result in the dispay appropriately. /// public ResultTable.Result Align_And_Extract(GeneSequence sequenceA, GeneSequence sequenceB, bool banded) { ResultTable.Result result = new ResultTable.Result(); int score; // place your computed alignment score here string[] alignment = new string[2]; // place your two computed alignments here // ********* these are placeholder assignments that you'll replace with your code ******* score = 0; alignment[0] = ""; alignment[1] = ""; int row_size = 0; int col_size = 0; int i_loop = 0; int j_loop = 0; char[] a = sequenceA.Sequence.ToCharArray(); char[] b = sequenceB.Sequence.ToCharArray(); i_loop = Math.Min(sequenceA.Sequence.Length, MaxCharactersToAlign); j_loop = Math.Min(sequenceB.Sequence.Length, MaxCharactersToAlign); //if the case it itself vs itself, jsut return all diagnols if (sequenceA.Sequence.Substring(0, i_loop).Equals(sequenceB.Sequence.Substring(0, j_loop))) { result.Update(MaxCharactersToAlign * -3, sequenceA.Sequence.Substring(0, i_loop), sequenceB.Sequence.Substring(0, j_loop)); return(result); } if (!banded)//if the banded checkbox is false { //store the length of the first and seond sequence, which is used in the for loops //i_loop, row_size is n //j-loop, col_size is m //gives us the length of the genom or the max number we put in, like 5000 or 15000. row_size = Math.Min(a.Length, MaxCharactersToAlign) + 1; col_size = Math.Min(b.Length, MaxCharactersToAlign) + 1; //the 2D array we'll use to store the costs. int[,] e = new int[row_size, col_size]; //the array to help us recreate the alignment strings with previous points. char[,] previous = new char[row_size, col_size]; /* * Assign all of the cells on the leftmost column to go 'up' with the cost of INDEL, or 5 . * Time complexity: O(n) at worst because it could have n cells. * Space complexity: O(nm) at worst for the entire 2D matrix the stores the costs ant the matrix that stores the previous pointers. */ for (int i = 0; i <= i_loop; i++) { e[i, 0] = i * indel_val; previous[i, 0] = up; } /* * Assign all of the cells on the top row to go 'left' with the cost of INDEL, or 5 . * Time complexity: O(m) at worst because it could have m cells. * Space complexity: O(nm) at worst for the entire 2D matrix the stores the costs ant the matrix that stores the previous pointers. */ for (int j = 1; j <= j_loop; j++) { e[0, j] = j * indel_val; previous[0, j] = left; } /* * Starting at the top left, we assign each cell a cost based on its neighbor's. This value = Min( Top, left, diag). Then we store the direction that we came from into the previous matrix. * Time complexity: O(nm) because we iterate over every cell. * Space complexity : O(nm) because we store every cell. */ for (int i = 1; i <= i_loop; i++) { for (int j = 1; j <= j_loop; j++) { int UP = e[i - 1, j] + indel_val; int LEFT = e[i, j - 1] + indel_val; int DIAG = e[i - 1, j - 1] + matcher(a[i - 1], b[j - 1]); int min = Math.Min(Math.Min(UP, LEFT), DIAG); e[i, j] = min; if (DIAG == min) { previous[i, j] = diag; } else if (UP == min) { previous[i, j] = up; } else { previous[i, j] = left; } } } //assign the total min cost score = e[i_loop, j_loop]; //make the alignment strings alignment = makeAligments(i_loop, j_loop, previous, a, b); result.Update(score, alignment[0], alignment[1]); return(result); } else//banded ------------------------------------------------------------------------------------------------------------------------------------- { //initialize some values. row_size = Math.Min(a.Length, MaxCharactersToAlign) + 1; col_size = Math.Min(b.Length, MaxCharactersToAlign) + 1; i_loop = Math.Min(a.Length, MaxCharactersToAlign); //length1 j_loop = Math.Min(b.Length, MaxCharactersToAlign); //length2 //check to see if we can actually do this operation because the banding will not calculate strings that are too different in size. if (Math.Abs(i_loop - j_loop) > 3) { score = int.MaxValue; alignment[0] = "No Alignment Possible"; alignment[1] = "No Alignment Possible"; result.Update(score, alignment[0], alignment[1]); return(result); } int[,] e = new int[row_size, col_size]; //the 2D array we'll use to solve our problem. char[,] previous = new char[row_size, col_size]; //the array to help us recreate the alignment strings. /* * Assign 4 cells on the leftmost column to go 'up' with the cost of INDEL, or 5 . * Time complexity: O(n) at worst because it could have n cells which happens to be less that 4. * Space complexity: O(nm) at worst for the entire 2D matrix the stores the costs ant the matrix that stores the previous pointers. */ for (int i = 0; i <= Math.Min(i_loop, 3); i++) { e[i, 0] = i * indel_val; previous[i, 0] = up; } /* * Assign 4 cells on the top row to go 'left' with the cost of INDEL, or 5 . * Time complexity: O(m) at worst because it could have n cells which happens to be less that 4. * Space complexity: O(nm) at worst for the entire 2D matrix the stores the costs ant the matrix that stores the previous pointers. */ for (int j = 1; j <= Math.Min(j_loop, 3); j++) { e[0, j] = j * indel_val; previous[0, j] = left; } /* * Starting at the top left, we assign each cell a cost based on its neighbor's. This value = Min( Top, left, diag). We are only caluculating 7 consecutive cells in a row. * This makes the operation O(7n + 7m) or O(n + m) when constants are dropped. * Time complexity: O(n + m) because with the banding we compare each character in the first sequence against no more than 7 characters in the second sequence and vise versa. * Space complexity : O(nm) because we store every cell. */ for (int i = 1; i <= i_loop; i++) { //me must do a calculation for how much of the current row we're on int start = Math.Max(i - 3, 1); int end = Math.Min(j_loop, i + 3); for (int j = start; j <= end; j++) { //We force the operations in banded by setting up and left directions to it and //we only correct it if there is not 3 of more indels. int UP = int.MaxValue; int LEFT = int.MaxValue; if (j != i + 3) { UP = e[i - 1, j] + indel_val; } if (j != i - 3) { LEFT = e[i, j - 1] + indel_val; } int DIAG = e[i - 1, j - 1] + matcher(a[i - 1], b[j - 1]); int min = Math.Min(Math.Min(UP, LEFT), DIAG); e[i, j] = min; if (DIAG == min) { previous[i, j] = diag; } else if (UP == min) { previous[i, j] = up; } else { previous[i, j] = left; } } } //set the score from the last value we calculated score = e[row_size - 1, col_size - 1]; //make the alignment strings alignment = makeAligments(i_loop, j_loop, previous, a, b); result.Update(score, alignment[0], alignment[1]); return(result); } }
/// <summary> /// this is the function you implement. /// </summary> /// <param name="sequenceA">the first sequence</param> /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param> /// <param name="banded">true if alignment should be band limited.</param> /// <returns>the alignment score and the alignment (in a Result object) for sequenceA and sequenceB. The calling function places the result in the dispay appropriately. /// public ResultTable.Result Align_And_Extract(GeneSequence sequenceA, GeneSequence sequenceB, bool banded) { // --------------------------------------------------------------------------------------------- // Setup section. Also returns without calculation for banded analysis of sequences which will be impossible // O(1) int maxlength = MaxCharactersToAlign; const int indel = 5; const int sub = 1; const int match = -3; //Console.WriteLine("Sequence a: " + sequenceA.Sequence); //Console.WriteLine("Sequence b: " + sequenceB.Sequence); int lengthA, lengthB; if (sequenceA.Sequence.Length > maxlength) { lengthA = maxlength; } else { lengthA = sequenceA.Sequence.Length; } if (sequenceB.Sequence.Length > maxlength) { lengthB = maxlength; } else { lengthB = sequenceB.Sequence.Length; } ResultTable.Result result = new ResultTable.Result(); int score; // place your computed alignment score here string[] alignment = new string[2]; // place your two computed alignments here // these "alignments" are just the two strings, with "-" added where an insertion/deletion has occured. simple enough. // ********* these are placeholder assignments that you'll replace with your code ******* score = 0; alignment[0] = ""; alignment[1] = ""; // *************************************************************************************** // We will not be able to get a banded result if the lengths differ by more than 3 if (banded && Math.Abs(lengthA - lengthB) > 3) { score = int.MaxValue; result.Update(score, "No Alignment Possible", "No Alignment Possible"); return(result); } // Sequence length because it has to fit the string AND a space for the "empty string" at the beginning node[,] calcTable = new node[lengthA + 1, lengthB + 1]; // Go through the entire table to calculate the things. int a, b = 0; // do top left corner first calcTable[0, 0] = new node(-1, -1, 0); // END setup section // ----------------------------------------------------------------------------------------------------------- if (banded) { //-------------------------------------------------------------------------------------------------------- // Banded table scores calculation. // It goes through the length of A, each time doing 7 calculations for B. // O(7n) - where n is the length of A // Also note that the length difference between A and B is MAX 4, so O(n) // Do entire top row for (a = 1; a < 4; a++) { calcTable[a, 0] = new node(a - 1, 0, a * indel); } // And entire left row for (b = 1; b < 4; b++) { calcTable[0, b] = new node(0, b - 1, b * indel); } // and the rest for (a = 1; a < lengthA + 1; a++) { for (b = a - 3; b < a + 4; b++) { // Can only calculate in the band if (b < 1 || b > lengthB) { // Can't be having those System.IndexOutOfRangeExceptions, can we? continue; } int topCost, leftCost, diagCost; // Calculate cost of coming from top if (calcTable[a, b - 1] == null) { topCost = int.MaxValue; // null is very bad. } else { topCost = calcTable[a, b - 1].score + indel; // coming from top is an insert/delete } // Calculate cost of coming from left if (calcTable[a - 1, b] == null) { leftCost = int.MaxValue; // again, null is very bad } else { leftCost = calcTable[a - 1, b].score + indel; // coming from left is also an insert/delete } // Calculate cost of coming from the diagonal // We don't worry about nulls here, because they are impossible if (sequenceA.Sequence[a - 1] == sequenceB.Sequence[b - 1]) { // If the two strings match at this character diagCost = calcTable[a - 1, b - 1].score + match; // coming from diagonal on a match! } else { diagCost = calcTable[a - 1, b - 1].score + sub; // coming from diagonal on a substitution } // Now to make our table entry if (diagCost <= leftCost && diagCost <= topCost) { // Diagonal is cheapest calcTable[a, b] = new node(a - 1, b - 1, diagCost); } else if (leftCost <= diagCost && leftCost <= topCost) { // Left is cheapest calcTable[a, b] = new node(a - 1, b, leftCost); } else { // Top is cheapest calcTable[a, b] = new node(a, b - 1, topCost); } } } // END Banded table scores calculation //-------------------------------------------------------------------------------- } else // ends our if section for banded; starts our section to calculate unbanded // ---------------------------------------------------------------------------------- // Start unbanded table scores calculation // Goes through the length of A, each time going through the length of B. // Therefore O(n*m), where n is the length of A, and m is the length of B. // Do the entire top row first { for (a = 1; a < lengthA + 1; a++) { calcTable[a, 0] = new node(a - 1, 0, a * indel); } // and the entire left row for (b = 1; b < lengthB + 1; b++) { // Skipping the first one which was already done calcTable[0, b] = new node(0, b - 1, b * indel); } // And the rest for (a = 1; a < lengthA + 1; a++) { for (b = 1; b < lengthB + 1; b++) { // Calculate cost of coming from top int topCost = calcTable[a, b - 1].score + indel; // coming from top is an insert/delete // Calculate cost of coming from left int leftCost = calcTable[a - 1, b].score + indel; // coming from left is also an insert/delete // Calculate cost of coming from the diagonal int diagCost; if (sequenceA.Sequence[a - 1] == sequenceB.Sequence[b - 1]) { // If the two strings match at this character diagCost = calcTable[a - 1, b - 1].score + match; // coming from diagonal on a match! } else { diagCost = calcTable[a - 1, b - 1].score + sub; // coming from diagonal on a substitution } // Now to make our table entry if (diagCost <= leftCost && diagCost <= topCost) { // Diagonal is cheapest calcTable[a, b] = new node(a - 1, b - 1, diagCost); } else if (leftCost <= diagCost && leftCost <= topCost) { // Left is cheapest calcTable[a, b] = new node(a - 1, b, leftCost); } else { // Top is cheapest calcTable[a, b] = new node(a, b - 1, topCost); } } } //END unbanded calculation //--------------------------------------------------------------------------------------------- } // This ends the difference between banded and unbanded calculation // -------------------------------------------------------------------------------------------------------------------------- // At this point our scores table should be complete. Now we just take the final node and walk back to the beginning with it // The length of this string is the larger of the length of A and the length of B // O(max(n,m)) where n is A's length and m is B's length a = lengthA; b = lengthB; StringBuilder strA = new StringBuilder(maxlength); StringBuilder strB = new StringBuilder(maxlength); // Go until we hit either the top or left row/column while (a != 0 && b != 0) { int parent_a = calcTable[a, b].parent_x; int parent_b = calcTable[a, b].parent_y; if (parent_a < a) // To get here, we came from left //alignment[0].Insert(0,sequenceA.Sequence[a-1].ToString()); // Which means we used a char of sequence a { strA.Insert(0, sequenceA.Sequence[a - 1].ToString()); if (parent_b < b) // Also came from top = came from diagonal //alignment[1].Insert(0, sequenceB.Sequence[b-1].ToString()); // Whicn means we ALSO used a char of sequence b { strB.Insert(0, sequenceB.Sequence[b - 1].ToString()); } else // Only came from left //alignment[1].Insert(0, "-"); // Used a char of sequence a but not b { strB.Insert(0, "-"); } } else { //alignment[0].Insert(0, "-"); // Did not come from left nor diagonal; must have been from top strA.Insert(0, "-"); //alignment[1].Insert(0, sequenceB.Sequence[b-1].ToString()); strB.Insert(0, sequenceB.Sequence[b - 1].ToString()); } a = parent_a; b = parent_b; } // Assume we hit the left column. This means a = 0 and b is getting smaller // This means we have used all of sequence a already. while (b != 0) { int parent_a = 0; int parent_b = calcTable[a, b].parent_y; //alignment[0].Insert(0,"-"); // Already used all of a, so it has gaps at the beginning strA.Insert(0, "-"); //alignment[1].Insert(0, sequenceB.Sequence[b-1].ToString()); strB.Insert(0, sequenceB.Sequence[b - 1].ToString()); a = parent_a; b = parent_b; } // Assume we hit the top row. This means b = 0 and a is getting smaller // This means we have used all of sequence b already. while (a != 0) { int parent_a = calcTable[a, b].parent_x; int parent_b = 0; //alignment[0] = alignment[0].Insert(0, sequenceA.Sequence[a - 1].ToString()); // Already used all of a, so it has gaps at the beginning strA.Insert(0, sequenceA.Sequence[a - 1].ToString()); //alignment[1] = alignment[1].Insert(0, "-"); strB.Insert(0, "-"); a = parent_a; b = parent_b; } // END final string calculation //--------------------------------------------------------------------------------------------------- //--------------------------------------------------------------------------------------------------- // From here on out, we are just wrapping up the calculations and returning the results // O(1) // If we reach here, we should have traced our strings back to the beginning. The alignment strings should be all good and we just need to get the score score = calcTable[lengthA, lengthB].score; result.Update(score, strA.ToString(), strB.ToString());//alignment[0],alignment[1]); // bundling your results into the right object type return(result); }
///////////////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// this is the function you implement. /// </summary> /// <param name="sequenceA">the first sequence</param> /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param> /// <param name="banded">true if alignment should be band limited.</param> /// <returns>the alignment score and the alignment (in a Result object) for sequenceA and sequenceB. The calling function places the result in the dispay appropriately. /// public ResultTable.Result Align_And_Extract(GeneSequence sequenceA, GeneSequence sequenceB, bool banded) { ResultTable.Result result = new ResultTable.Result(); int score; // place your computed alignment score here string[] alignment = new string[2]; // place your two computed alignments here // ********* these are placeholder assignments that you'll replace with your code ******* score = 0; alignment[0] = ""; alignment[1] = ""; // *************************************************************************************** if (!banded) unrestrictedAlgorithm(ref score, ref alignment, ref sequenceA, ref sequenceB); else bandedAlgorithm(ref score, ref alignment, ref sequenceA, ref sequenceB); result.Update(score, alignment[0], alignment[1]); // bundling your results into the right object type return (result); }
private void fillMatrix() { int alignLen = int.Parse(bandlengthBox.Text); ResultTable.Result result; PairWiseAlign processor = new PairWiseAlign(alignLen); for (int x = 0; x < NUMBER_OF_SEQUENCES; ++x) { for (int y = x; y < NUMBER_OF_SEQUENCES; ++y) { // if the two sequences are the same, fill that cell with -3*length because they match if (x == y) { result = new ResultTable.Result(); PairWiseAlign align = new PairWiseAlign(); int lengthOfSequenceA = Math.Min(m_sequences[x].Sequence.Length, align.getMaxhCharactersToAlign()); int score = -3 * lengthOfSequenceA; string alignment = m_sequences[x].Sequence.Substring(0, lengthOfSequenceA); result.Update(score, alignment, alignment); } else { result = processor.Align_And_Extract(m_sequences[x], m_sequences[y], bandCheckBox.Checked); // ********* hook to the student's code } m_resultTable.AddResult(x,y,result); m_resultTable.DisplayResult(x, y); } } }