private int computeVal(int[,] matrix, int[,] prev, int row, int col, GeneSequence sequenceA, GeneSequence sequenceB) { // Get the two letters to compare. char letterA = sequenceA.Sequence[row - 1]; char letterB = sequenceB.Sequence[col - 1]; int diagVal = letterA == letterB ? -3 : 1; // If they are the same, the diagonal score is -3, otherwise 1 int indelVal = 5; // If the diagnoal score is the smallest of the three, store the DIAG value in the prev matrix and return the cost for the cell at matrix[row,col] if (matrix[row - 1, col - 1] + diagVal <= matrix[row - 1, col] + indelVal && matrix[row - 1, col - 1] + diagVal <= matrix[row, col - 1] + indelVal) { prev[row, col] = DIAG; return(matrix[row - 1, col - 1] + diagVal); } // Else if the indel score from above is the smallest, store the UP value in the prev matrix and return the cost else if (matrix[row - 1, col] + indelVal < matrix[row - 1, col - 1] + diagVal && matrix[row - 1, col] + indelVal <= matrix[row, col - 1] + indelVal) { prev[row, col] = UP; return(matrix[row - 1, col] + indelVal); } // Else the indel score from the left must be the smallest. Store it in prev and return the cost. else { prev[row, col] = LEFT; return(matrix[row, col - 1] + indelVal); } }
private void bandedAlg(int[,] matrix, int[,] prev, int rows, int cols, GeneSequence sequenceA, GeneSequence sequenceB) { if (Math.Abs(rows - cols) > 3) { matrix[rows - 1, cols - 1] = int.MaxValue; return; } int i, j; i = j = 0; try { int maxD = rows > cols ? rows : cols; for (i = 1; i < maxD; i++) { for (j = 0; j < 4; j++) { if (i + j < cols && i < rows) { matrix[i, i + j] = computeVal(matrix, prev, i, i + j, sequenceA, sequenceB); } if (i + j < rows && i < cols) { matrix[i + j, i] = computeVal(matrix, prev, i + j, i, sequenceA, sequenceB); } } } } catch (IndexOutOfRangeException e) { Console.WriteLine("rows=" + rows + " cols=" + cols); Console.WriteLine("i=" + i + " j=" + j); } }
private void bandedAlg(int[,] matrix, int[,] prev, int rows, int cols, GeneSequence sequenceA, GeneSequence sequenceB) { // If the difference is greater than 3, they cannot be aligned. if (Math.Abs(rows - cols) > 3) { // Set the score cell to int.MaxValue to indicate that they weren't aligned. matrix[rows - 1, cols - 1] = int.MaxValue; return; } int i, j; i = j = 0; // Get the larger of the two out of the rows and columns. This is because we need to get to the bottom right. int maxD = rows > cols ? rows : cols; // This loop travels down the diagonal for (i = 1; i < maxD; i++) { // This loop uses an offset to compute the cell on the diagonal, 3 to the right, and 3 below. for (j = 0; j < 4; j++) { // Compute cell to the right if (i + j < cols && i < rows) { matrix[i, i + j] = computeVal(matrix, prev, i, i + j, sequenceA, sequenceB); } // Compute cell below. if (i + j < rows && i < cols) { matrix[i + j, i] = computeVal(matrix, prev, i + j, i, sequenceA, sequenceB); } } } }
public void WriteGeneSequence(GeneSequence geneSequence) { //writemessage("writing to database..."); try { // because the sequence can be so long, we need to use parameters //string insertCommandString = "INSERT INTO DNA (Name, Sequence) VALUES (?, ?)"; string insertCommandString = "INSERT INTO DNA (Name, Sequence) VALUES (?, ?)"; //string insertCommandString = "INSERT INTO DNA VALUES (3, ?, ?)"; OleDbCommand insertCommand = new OleDbCommand(insertCommandString, m_accessConn); ASCIIEncoding encoding = new ASCIIEncoding(); insertCommand.Parameters.Add(new OleDbParameter("name", geneSequence.Name)); //insertCommand.Parameters.Add(new OleDbParameter("name", encoding.GetBytes(geneSequence.Name.ToCharArray()))); int a = geneSequence.Name.Length; //insertCommand.Parameters.Add(new OleDbParameter("sequence", geneSequence.Sequence.ToCharArray())); insertCommand.Parameters.Add(new OleDbParameter("sequence", geneSequence.Sequence)); m_accessConn.Open(); insertCommand.ExecuteNonQuery(); } /*catch (Exception e) * { * //writemessage("Error trying to write the results to the database"); * //writemessage(e.ToString()); * return; * }*/ finally { m_accessConn.Close(); } //writemessage("done writing to database. See the row in the tProblems table with problem = " + currentProblem + " to see what happened."); }
/// <summary> /// this is the function you implement. /// </summary> /// <param name="sequenceA">the first sequence</param> /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param> /// <param name="resultTableSoFar">the table of alignment results that has been generated so far using pair-wise alignment</param> /// <param name="rowInTable">this particular alignment problem will occupy a cell in this row the result table.</param> /// <param name="columnInTable">this particular alignment will occupy a cell in this column of the result table.</param> /// <returns>the alignment score for sequenceA and sequenceB. The calling function places the result in entry rowInTable,columnInTable /// of the ResultTable</returns> public int Score(GeneSequence sequenceA, GeneSequence sequenceB, ResultTable resultTableSoFar, int rowInTable, int columnInTable) { initialize(sequenceA, sequenceB); for (int i = 1; i < height; i++) { for (int j = 1; j < width; j++) { int diagCellCost = 0; if (X[j - 1] == Y[i - 1]) diagCellCost = CharsMatchCost; else diagCellCost = SubstitutionCost; double topCell = prev[j] + InsertDeleteCost; double leftCell = results[j - 1] + InsertDeleteCost; double diagCell = prev[j - 1] + diagCellCost; double min = Math.Min(topCell, Math.Min(diagCell, leftCell)); results[j] = min; } currentRow++; SwapArrays(); } return (int)prev[width - 1]; }
///////////////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// this is the function you implement. /// </summary> /// <param name="sequenceA">the first sequence</param> /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param> /// <param name="banded">true if alignment should be band limited.</param> /// <returns>the alignment score and the alignment (in a Result object) for sequenceA and sequenceB. The calling function places the result in the dispay appropriately. /// public ResultTable.Result Align_And_Extract(GeneSequence sequenceA, GeneSequence sequenceB, bool banded) { ResultTable.Result result = new ResultTable.Result(); int score; // place your computed alignment score here string[] alignment = new string[2]; // place your two computed alignments here // ********* these are placeholder assignments that you'll replace with your code ******* score = 0; alignment[0] = ""; alignment[1] = ""; // *************************************************************************************** if (!banded) { unrestrictedAlgorithm(ref score, ref alignment, ref sequenceA, ref sequenceB); } else { bandedAlgorithm(ref score, ref alignment, ref sequenceA, ref sequenceB); } result.Update(score, alignment[0], alignment[1]); // bundling your results into the right object type return(result); }
/** * This function creates the alignments for both sequences using the previous pointers array * Time Complexity: O(n) where n is the length of the larger sequence because it the best alignment * is as long as the length of the longest sequence * Space Complexity: O(n) where n is the length of the larger sequence as it creates a string as long as it */ void createAlignments(ref string[] alignment, ref directions[,] prev, ref GeneSequence sequenceA, ref GeneSequence sequenceB, ref int lengthOfSequenceA, ref int lengthOfSequenceB) { int rowIterator = lengthOfSequenceA, columnIterator = lengthOfSequenceB; StringBuilder first = new StringBuilder(), second = new StringBuilder(); while (rowIterator != 0 || columnIterator != 0) { if (prev[rowIterator, columnIterator] == directions.DIAGONAL) // match/sub { first.Insert(0, sequenceA.Sequence[rowIterator - 1]); second.Insert(0, sequenceB.Sequence[columnIterator - 1]); rowIterator--; columnIterator--; } else if (prev[rowIterator, columnIterator] == directions.LEFT) //insert { first.Insert(0, '-'); second.Insert(0, sequenceB.Sequence[columnIterator - 1]); columnIterator--; } else // delete { first.Insert(0, sequenceA.Sequence[rowIterator - 1]); second.Insert(0, '-'); rowIterator--; } } // Limiting the length of the string to 100 if it exceeds it alignment[0] = first.ToString().Substring(0, Math.Min(first.Length, 100)); alignment[1] = second.ToString().Substring(0, Math.Min(second.Length, 100)); }
private GeneSequence[] loadFile(string fileName) { StreamReader reader = new StreamReader(fileName); string input = ""; try { input = reader.ReadToEnd(); } catch { Console.WriteLine("Error Parsing File..."); return(null); } finally { reader.Close(); } GeneSequence[] temp = new GeneSequence[NUMBER_OF_SEQUENCES]; string[] inputLines = input.Split('\r'); for (int i = 0; i < NUMBER_OF_SEQUENCES; i++) { string[] line = inputLines[i].Replace("\n", "").Split('#'); temp[i] = new GeneSequence(line[0], line[1]); } return(temp); }
private void dataGridViewResults_CellMouseClick(object sender, DataGridViewCellMouseEventArgs e) { GeneSequence seqA = m_sequences[e.ColumnIndex]; GeneSequence seqB = m_sequences[e.RowIndex]; String[] results = processor.extractSolution(seqA, seqB); String outputMessage = String.Format("Output Console: {0}= MATCH, {1}= SUB, {2}= INDEL", processor.MATCH_CHAR, processor.SUB_CHAR, processor.INDEL_CHAR); String outputText = String.Format("{0}\r\nGene Alignment for Cell (Row:{1}, Col:{2})\r\nA: {3}\r\n {4}\r\nB: {5}", outputMessage, e.RowIndex + 1, e.ColumnIndex + 1, processor.formatSequence(results[0], MaxToDisplay), processor.formatSequence(results[2], MaxToDisplay), processor.formatSequence(results[1], MaxToDisplay)); String sideText = String.Format("\r\n\r\nA: {0}\r\n\r\nB: {1}\r\n\r\nA: {2}\r\n\r\nB: {3}", seqA.Name, seqB.Name, processor.formatSequence(seqA.Sequence, 15), processor.formatSequence(seqB.Sequence, 15)); sideBar.Text = sideText; outputConsole.Text = outputText; }
public String[] extractSolution(GeneSequence sequenceA, GeneSequence sequenceB) { // initialize arrays and strings and first row initialize(sequenceA, sequenceB); // initialize table to store each row List <int[]> resultTable = new List <int[]>(charA.Length + 1); resultTable.Add(resultRow); // calculate each additional row (and save it for backtrace) for (int i = 0; i < charA.Length; i++) { resultTable.Add(computeNextRow(resultTable[i], charA[i], charB)); } // compute back trace and generate final strings StringBuilder buildA = new StringBuilder(); StringBuilder buildB = new StringBuilder(); StringBuilder buildC = new StringBuilder(); int row = charA.Length; int col = charB.Length; // while index pointers to string a (row) and string b (column) aren't at zero figure out last operation while (row != 0 || col != 0) { if (resultTable[row][col] == resultTable[row][col - 1] + INDEL) { buildA.Append('-'); buildB.Append(charB[--col]); buildC.Append(INDEL_CHAR); } else if (resultTable[row][col] == resultTable[row - 1][col] + INDEL) { buildA.Append(charA[--row]); buildB.Append('-'); buildC.Append(INDEL_CHAR); } else if (resultTable[row][col] == resultTable[row - 1][col - 1] + MATCH || resultTable[row][col] == resultTable[row - 1][col - 1] + SUB) { buildA.Append(charA[--row]); buildB.Append(charB[--col]); buildC.Append(charB[col] == charA[row] ? MATCH_CHAR : SUB_CHAR); } else { throw new ArgumentException(); } } String[] results = new String[3]; results[0] = reverseString(buildA.ToString()); results[1] = reverseString(buildB.ToString()); results[2] = reverseString(buildC.ToString()); return(results); }
/// <summary> /// This is the function you implement. /// </summary> /// <param name="sequenceA">the first sequence</param> /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param> /// <param name="resultTableSoFar">the table of alignment results that has been generated so far using pair-wise alignment</param> /// <param name="rowInTable">this particular alignment problem will occupy a cell in this row the result table.</param> /// <param name="columnInTable">this particular alignment will occupy a cell in this column of the result table.</param> /// <returns>the alignment score for sequenceA and sequenceB. The calling function places the result in entry rowInTable,columnInTable /// of the ResultTable</returns> public int Align(GeneSequence sequenceA, GeneSequence sequenceB, ResultTable resultTableSoFar, int rowInTable, int columnInTable) { if((columnInTable - rowInTable) < 0) { return 0; } Grid grid = new Grid(sequenceA.Sequence, sequenceB.Sequence, true, MaxCharactersToAlign); return grid.CalculateScoreSolution(); }
private void unrestricted(int[,] matrix, int[,] prev, int rows, int cols, GeneSequence sequenceA, GeneSequence sequenceB) { for (int i = 1; i < rows; i++) { for (int j = 1; j < cols; j++) { matrix[i, j] = computeVal(matrix, prev, i, j, sequenceA, sequenceB); } } }
/// <summary> /// this is the function you implement. /// </summary> /// <param name="sequenceA">the first sequence</param> /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param> /// <param name="banded">true if alignment should be band limited.</param> /// <returns>the alignment score and the alignment (in a Result object) for sequenceA and sequenceB. The calling function places the result in the display appropriately. /// public ResultTable.Result Align_And_Extract(GeneSequence sequenceA, GeneSequence sequenceB, bool banded) { ResultTable.Result result = new ResultTable.Result(); int score; // place your computed alignment score here string[] alignment = new string[2]; // place your two computed alignments here int sub = MaxCharactersToAlign; if (sequenceA.Sequence.Length < sub) { sub = sequenceA.Sequence.Length; } int sub2 = MaxCharactersToAlign; if (sequenceB.Sequence.Length < sub2) { sub2 = sequenceB.Sequence.Length; } // ********* these are placeholder assignments that you'll replace with your code ******* score = int.MaxValue; alignment[0] = "No Alignment Possible"; alignment[1] = "No Alignment Possible"; EditDistance editor; if (banded) { if (Math.Abs(sub2 - sub) > Bandwidth) { result.Update(score, alignment[0], alignment[1]); // bundling your results into the right object type return(result); } editor = new EditDistance(sequenceA.Sequence.Substring(0, sub), sequenceB.Sequence.Substring(0, sub2)); editor.setupBanded(); //Console.WriteLine(editor.toString()); alignment = editor.bandedResults(); //Console.WriteLine(editor.toString()); score = editor.value(); } else { editor = new EditDistance(sequenceA.Sequence.Substring(0, sub), sequenceB.Sequence.Substring(0, sub2)); editor.setupUnbanded(); alignment = editor.results(); score = editor.value(); } // *************************************************************************************** result.Update(score, alignment[0], alignment[1]); // bundling your results into the right object type return(result); }
private void unrestricted(int[,] matrix, int[,] prev, int rows, int cols, GeneSequence sequenceA, GeneSequence sequenceB) { // For every cell from top left to bottom right, compute the value. Compute by row. for (int i = 1; i < rows; i++) { for (int j = 1; j < cols; j++) { matrix[i, j] = computeVal(matrix, prev, i, j, sequenceA, sequenceB); } } }
/// <summary> /// this is the function you implement. /// </summary> /// <param name="sequenceA">the first sequence</param> /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param> /// <param name="resultTableSoFar">the table of alignment results that has been generated so far using pair-wise alignment</param> /// <param name="rowInTable">this particular alignment problem will occupy a cell in this row the result table.</param> /// <param name="columnInTable">this particular alignment will occupy a cell in this column of the result table.</param> /// <returns>the alignment score for sequenceA and sequenceB. The calling function places the result in entry rowInTable,columnInTable /// of the ResultTable</returns> public int Align(GeneSequence sequenceA, GeneSequence sequenceB, ResultTable resultTableSoFar, int rowInTable, int columnInTable) { // Only fill in above the diagonal if (rowInTable <= columnInTable) { return(0); } string a = sequenceA.Sequence; string b = sequenceB.Sequence; int m = Math.Min(a.Length, MaxCharactersToAlign); int n = Math.Min(b.Length, MaxCharactersToAlign); int[][] E = new int[2][]; E[0] = new int[MaxCharactersToAlign + 1]; E[1] = new int[MaxCharactersToAlign + 1]; // Initialize first row with cost of indels for (int j = 0; j <= n; j++) { E[0][j] = 5 * j; } int previous = 0; int active = 1; for (int i = 1; i <= m; i++) { for (int j = 0; j <= n; j++) { var indels = E[previous][j] + 5; if (j == 0) { // If first element, only have one option E[active][j] = indels; } else { // Get the minimum cost from the two available indels and the diagonal match/sub indels = Math.Min(E[active][j - 1] + 5, indels); var diff = (a[i - 1] == b[j - 1]) ? -3 : 1; E[active][j] = Math.Min(indels, E[previous][j - 1] + diff); } } // Swap active and previous (new active will get overwritten) active = (active == 0) ? 1 : 0; previous = (previous == 0) ? 1 : 0; } // Return last element in last filled row return(E[previous][n]); }
private void initialize(GeneSequence sequenceA, GeneSequence sequenceB) { // grab first 5000 (or all if less than 5000) characters of each sequence charA = formatSequence(sequenceA.Sequence, MaxCharactersToAlign).ToCharArray(); charB = formatSequence(sequenceB.Sequence, MaxCharactersToAlign).ToCharArray(); resultRow = new int[charB.Length + 1]; // initialize bottom row with costs for INDEL for (int i = 0; i < resultRow.Length; i++) { resultRow[i] = i * INDEL; } }
public int Align(GeneSequence sequenceA, GeneSequence sequenceB) { // initialize arrays and strings and first row initialize(sequenceA, sequenceB); // calculate each additional row for (int i = 0; i < charA.Length; i++) resultRow = computeNextRow(resultRow, charA[i], charB); // return score return resultRow[resultRow.Length - 1]; }
//Above are functions dealing with the alignment, below are functions dealing with the extraction public String[] extractSequences(GeneSequence sequenceA, GeneSequence sequenceB) { // set up backtrace initializeSequencing(sequenceA, sequenceB); // initialize table to store each row List <int[]> resultTable = new List <int[]>(charA.Length + 1); resultTable.Add(resultRow); // calculate individual table for (int i = 0; i < charA.Length; i++) //recalculates the table in O(n^2) time as before in scoring algorithm { resultTable.Add(createNextRow(resultTable[i], charA[i], charB)); } // initialize stringholders StringBuilder one = new StringBuilder(); StringBuilder two = new StringBuilder(); int row = charA.Length; int col = charB.Length; // backtrace strings // creates the string in reverse order as it traverses from the end to the beginning only going through those on the final path while (row != 0 || col != 0) { if (resultTable[row][col] == resultTable[row][col - 1] + INDEL) { one.Append('-'); two.Append(charB[--col]); } else if (resultTable[row][col] == resultTable[row - 1][col] + INDEL) { one.Append(charA[--row]); two.Append('-'); } else if (resultTable[row][col] == resultTable[row - 1][col - 1] + MATCH || resultTable[row][col] == resultTable[row - 1][col - 1] + SUB) { one.Append(charA[--row]); two.Append(charB[--col]); } else { throw new ArgumentException(); } } String[] results = new String[2]; results[0] = reverseString(one.ToString()); results[1] = reverseString(two.ToString()); return(results); }
public String[] extractSolution(GeneSequence sequenceA, GeneSequence sequenceB) { // initialize arrays and strings and first row initialize(sequenceA, sequenceB); // initialize table to store each row List<int[]> resultTable = new List<int[]>(charA.Length + 1); resultTable.Add(resultRow); // calculate each additional row (and save it for backtrace) for (int i = 0; i < charA.Length; i++) resultTable.Add(computeNextRow(resultTable[i], charA[i], charB)); // compute back trace and generate final strings StringBuilder buildA = new StringBuilder(); StringBuilder buildB = new StringBuilder(); StringBuilder buildC = new StringBuilder(); int row = charA.Length; int col = charB.Length; // while index pointers to string a (row) and string b (column) aren't at zero figure out last operation while (row != 0 || col != 0) { if (resultTable[row][col] == resultTable[row][col - 1] + INDEL) { buildA.Append('-'); buildB.Append(charB[--col]); buildC.Append(INDEL_CHAR); } else if (resultTable[row][col] == resultTable[row - 1][col] + INDEL) { buildA.Append(charA[--row]); buildB.Append('-'); buildC.Append(INDEL_CHAR); } else if (resultTable[row][col] == resultTable[row - 1][col - 1] + MATCH || resultTable[row][col] == resultTable[row - 1][col - 1] + SUB) { buildA.Append(charA[--row]); buildB.Append(charB[--col]); buildC.Append(charB[col] == charA[row] ? MATCH_CHAR : SUB_CHAR); } else throw new ArgumentException(); } String[] results = new String[3]; results[0] = reverseString(buildA.ToString()); results[1] = reverseString(buildB.ToString()); results[2] = reverseString(buildC.ToString()); return results; }
//Sets up the array with the initial values for sequencing private void initializeSequencing(GeneSequence sequenceA, GeneSequence sequenceB) { // grabs first 5000 characters of the sequences to evaluate charA = stringLimit(sequenceA.Sequence, MaxCharactersToAlign).ToCharArray(); charB = stringLimit(sequenceB.Sequence, MaxCharactersToAlign).ToCharArray(); resultRow = new int[charB.Length + 1]; // initialize result row with costs for INDEL for (int i = 0; i < resultRow.Length; i++) { resultRow[i] = i * INDEL; } }
///////////////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////// Unrestricted Algorithm //////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////// /** * This function performs the unrestricted algorithm on the two sequences using dynamic programming to come up with * the best alignment for both. * Time Complexity: O(nm) where n is the length of the first sequence and m is the length of the second sequence. This * is because the algorithm iterates over all cells in the array of n x m * Space Complexity: O(nm) where n is the length of the first sequence and m is the length of the second sequence. This * is because the algorithm creates an array of n x m */ void unrestrictedAlgorithm(ref int score, ref string[] alignment, ref GeneSequence sequenceA, ref GeneSequence sequenceB) { // Limiting the lengths of the sequences to the max characters to align int lengthOfSequenceA = Math.Min(sequenceA.Sequence.Length, MaxCharactersToAlign); int lengthOfSequenceB = Math.Min(sequenceB.Sequence.Length, MaxCharactersToAlign); // Create two arrays to hold the intermediate values and the alignment details int[,] values = new int[lengthOfSequenceA + 1, lengthOfSequenceB + 1]; directions[,] prev = new directions[lengthOfSequenceA + 1, lengthOfSequenceB + 1]; // first fill first row and column with cost of inserts/deletes fillStartCells(ref values, ref prev, lengthOfSequenceA, lengthOfSequenceB, false); // Now iterate through the rest of the cells filling out the min value for each for (int row = 1; row < lengthOfSequenceA + 1; row++) { for (int column = 1; column < lengthOfSequenceB + 1; column++) { // Compute values for each direction int costOfTop_Delete = values[row - 1, column] + 5; int costOfLeft_Insert = values[row, column - 1] + 5; // Compute cost of moving from diagonal depending on whether the letters match int costOfMovingFromDiagonal = (sequenceA.Sequence[row - 1] == sequenceB.Sequence[column - 1]) ? -3 : 1; int costOfDiagonal = values[row - 1, column - 1] + costOfMovingFromDiagonal; // value of cell would be the minimum cost out of the three directions int costOfMin = Math.Min(costOfTop_Delete, Math.Min(costOfLeft_Insert, costOfDiagonal)); values[row, column] = costOfMin; // Store the direction if (costOfMin == costOfDiagonal) { prev[row, column] = directions.DIAGONAL; } else if (costOfMin == costOfLeft_Insert) { prev[row, column] = directions.LEFT; } else { prev[row, column] = directions.TOP; } } } // score would be value of the last cell score = values[lengthOfSequenceA, lengthOfSequenceB]; // Create the alignments createAlignments(ref alignment, ref prev, ref sequenceA, ref sequenceB, ref lengthOfSequenceA, ref lengthOfSequenceB); }
public int Align(GeneSequence sequenceA, GeneSequence sequenceB) { // initialize arrays and strings and first row initialize(sequenceA, sequenceB); // calculate each additional row for (int i = 0; i < charA.Length; i++) { resultRow = computeNextRow(resultRow, charA[i], charB); } // return score return(resultRow[resultRow.Length - 1]); }
public void initialize(GeneSequence aSequence, GeneSequence bSequence) { X = aSequence.Sequence; Y = bSequence.Sequence; setDimensions(); prev = new double[width]; results = new double[width]; for (int i = 1; i < width; ++i) results[i] = InsertDeleteCost * i; currentRow++; SwapArrays(); }
public dpRows(GeneSequence aSequence, GeneSequence bSequence) { X = aSequence.Sequence; Y = bSequence.Sequence; setDimensions(); prev = new double[width]; results = new double[width]; for (int i = 1; i < width; ++i) this.SetCell(i, InsertDeleteCost * i); currentRow++; this.SwapArrays(); }
/// <summary> /// this is the function you implement. /// </summary> /// <param name="sequenceA">the first sequence</param> /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param> /// <param name="banded">true if alignment should be band limited.</param> /// <returns>the alignment score and the alignment (in a Result object) for sequenceA and sequenceB. The calling function places the result in the dispay appropriately. /// public ResultTable.Result Align_And_Extract(GeneSequence sequenceA, GeneSequence sequenceB, bool banded) { ResultTable.Result result = new ResultTable.Result(); int score; // place your computed alignment score here string[] alignment = new string[2]; // place your two computed alignments here alignment[0] = alignment[1] = ""; int maxLengthVal = banded ? 15001 : MaxCharactersToAlign; // If the sequences are longer than the desired alignment length, align only the desired amount. int rows = maxLengthVal < sequenceA.Sequence.Length + 1? maxLengthVal : sequenceA.Sequence.Length + 1; int cols = maxLengthVal < sequenceB.Sequence.Length + 1? maxLengthVal : sequenceB.Sequence.Length + 1; // Create the cost matrix and the matrix used to track the path. int[,] matrix = new int[rows, cols]; int[,] prev = new int[rows, cols]; initializeMatrices(matrix, prev, rows, cols); // If it's not banded, do the unrestriced algorithm. Otherwise do banded. if (!banded) { unrestricted(matrix, prev, rows, cols, sequenceA, sequenceB); } else { bandedAlg(matrix, prev, rows, cols, sequenceA, sequenceB); } // The score is stored in the last cell. score = matrix[rows - 1, cols - 1]; // Find the alignment strings by using the path stored in prev findAlignments(alignment, prev, rows, cols, score, sequenceA.Sequence, sequenceB.Sequence); // If the strings are too long to display, just display 100 characters. if (alignment[0].Length > 100) { alignment[0] = alignment[0].Substring(0, 100); } if (alignment[1].Length > 100) { alignment[1] = alignment[1].Substring(0, 100); } result.Update(score, alignment[0], alignment[1]); // bundling your results into the right object type return(result); }
public GenomeSequencer(GeneSequence sequenceA, GeneSequence sequenceB, int maxSize) { //sequence A will the the "top" word, so it will be the columns //sequence B will be the "side" word, so it will be the rows this.sequenceA = sequenceA; this.sequenceB = sequenceB; rowSize = Math.Min(maxSize, sequenceB.Sequence.Length) + 1; //add one to add base case row and column colSize = Math.Min(maxSize, sequenceA.Sequence.Length) + 1; //set up the sequencer arrays to be the size of the sequences to be used when calculating the cost of the sequences int alength = sequenceA.Sequence.Length; int blength = sequenceB.Sequence.Length;// used for debugging initializeAlignmentCost(); initializePreviousMatrix(); }
private void dataGridViewResults_CellClick(object sender, DataGridViewCellEventArgs e) { GeneSequence sequenceA = this.m_sequences[e.ColumnIndex]; GeneSequence sequenceB = this.m_sequences[e.RowIndex]; String[] results = processor.extractSequences(sequenceA, sequenceB); String outputText = "Output Console:"; outputText += "\r\nCell ("; outputText += (e.RowIndex + 1) + ", "; outputText += (e.ColumnIndex + 1) + ")"; outputText += "\r\nSequence A: " + processor.stringLimit(results[0], 100); outputText += "\r\nSequence B: " + processor.stringLimit(results[1], 100); outputConsole.Text = outputText; }
public dpTable(GeneSequence aSequence, GeneSequence bSequence) { X = aSequence.Sequence; Y = bSequence.Sequence; setDimensions(); results = new node[width, height]; this.SetCell(0, 0, 0, "start"); for (int i = 1; i < width; ++i) this.SetCell(i, 0, indel * i, "left"); for (int j = 1; j < height; ++j) this.SetCell(0, j, indel * j, "top"); }
public Algo(GeneSequence sequenceA, GeneSequence sequenceB, bool banded, int size) { seqA = sequenceA; seqB = sequenceB; this.banded = banded; this.sizeRow = size + 1; this.sizeCol = size + 1; if (this.sizeRow > sequenceA.Sequence.Length) { this.sizeRow = sequenceA.Sequence.Length; this.sizeRow++; } if (this.sizeCol > sequenceB.Sequence.Length) { this.sizeCol = sequenceB.Sequence.Length; this.sizeCol++; } prev = new char[this.sizeRow, this.sizeCol]; dis = new int[this.sizeRow, this.sizeCol]; }
/// <summary> /// this is the function you implement. /// </summary> /// <param name="sequenceA">the first sequence</param> /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param> /// <param name="banded">true if alignment should be band limited.</param> /// <returns>the alignment score and the alignment (in a Result object) for sequenceA and sequenceB. The calling function places the result in the dispay appropriately. /// public ResultTable.Result Align_And_Extract(GeneSequence sequenceA, GeneSequence sequenceB, bool banded) { ResultTable.Result result = new ResultTable.Result(); int score; // place your computed alignment score here string[] alignment = new string[2]; // place your two computed alignments here // ********* these are placeholder assignments that you'll replace with your code ******* GenomeSequencer genomeSequencer = new GenomeSequencer(sequenceA, sequenceB, MaxCharactersToAlign); score = genomeSequencer.calculateSequenceCost(banded); //score = 0; alignment[0] = ""; alignment[1] = ""; // *************************************************************************************** result.Update(score, alignment[0], alignment[1]); // bundling your results into the right object type return(result); }
//Dictionary<string, int> previouslyCalculatedValues = new Dictionary<string, int>(); /// <summary> /// this is the function you implement. /// </summary> /// <param name="sequenceA">the first sequence</param> /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param> /// <param name="resultTableSoFar">the table of alignment results that has been generated so far using pair-wise alignment</param> /// <param name="rowInTable">this particular alignment problem will occupy a cell in this row the result table.</param> /// <param name="columnInTable">this particular alignment will occupy a cell in this column of the result table.</param> /// <returns>the alignment score for sequenceA and sequenceB. The calling function places the result in entry rowInTable,columnInTable /// of the ResultTable</returns> public int Align(GeneSequence sequenceA, GeneSequence sequenceB, ResultTable resultTableSoFar, int rowInTable, int columnInTable) { // a place holder computation. You'll want to implement your code here. //string key = sequenceA.Sequence.ToString() + sequenceB.Sequence.ToString(); //if (previouslyCalculatedValues.ContainsKey(key)) // return previouslyCalculatedValues[key]; // set up algorithm initializeSequencing(sequenceA, sequenceB); // calculate each additional row //Overall time complexity of this part is O(n^2) and space complexity is O(n) for (int i = 0; i < charA.Length; i++) // will go through the length of our frist sequence array size 0-5000 O(n) { resultRow = createNextRow(resultRow, charA[i], charB); //O(n) } // return score //previouslyCalculatedValues.Add(key,resultRow[resultRow.Length - 1]); return(resultRow[resultRow.Length - 1]); }
/// <summary> /// this is the function you implement. /// </summary> /// <param name="sequenceA">the first sequence</param> /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param> /// <param name="banded">true if alignment should be band limited.</param> /// <returns>the alignment score and the alignment (in a Result object) for sequenceA and sequenceB. The calling function places the result in the dispay appropriately. /// public ResultTable.Result Align_And_Extract(Tuple<int,int> cell, GeneSequence sequenceA, GeneSequence sequenceB, bool banded) { ResultTable.Result result = new ResultTable.Result(); int score; // place your computed alignment score here string[] alignment = new string[2]; // place your two computed alignments here // ********* these are placeholder assignments that you'll replace with your code ******* string a = sequenceA.Sequence.Substring(0, sequenceA.Sequence.Length < MaxCharactersToAlign ? sequenceA.Sequence.Length : MaxCharactersToAlign); string b = sequenceB.Sequence.Substring(0, sequenceB.Sequence.Length < MaxCharactersToAlign ? sequenceB.Sequence.Length : MaxCharactersToAlign); DynamicProgramming dp = new DynamicProgramming(cell, a, b, banded); score = dp.getScore(); alignment[0] = dp.getResultA(); alignment[1] = dp.getResultB(); // *************************************************************************************** result.Update(score,alignment[0],alignment[1]); // bundling your results into the right object type return(result); }
/// <summary> /// this is the function you implement. /// </summary> /// <param name="sequenceA">the first sequence</param> /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param> /// <param name="banded">true if alignment should be band limited.</param> /// <returns>the alignment score and the alignment (in a Result object) for sequenceA and sequenceB. The calling function places the result in the dispay appropriately. /// public ResultTable.Result Align_And_Extract(GeneSequence sequenceA, GeneSequence sequenceB, bool banded) { ResultTable.Result result = new ResultTable.Result(); int score; // place your computed alignment score here string[] alignment = new string[2]; // place your two computed alignments here alignment[0] = alignment[1] = ""; int maxLengthVal = banded ? 15001 : MaxCharactersToAlign; int rows = maxLengthVal < sequenceA.Sequence.Length + 1? maxLengthVal : sequenceA.Sequence.Length + 1; int cols = maxLengthVal < sequenceB.Sequence.Length + 1? maxLengthVal : sequenceB.Sequence.Length + 1; int[,] matrix = new int[rows, cols]; int[,] prev = new int[rows, cols]; initializeMatrices(matrix, prev, rows, cols); if (!banded) { unrestricted(matrix, prev, rows, cols, sequenceA, sequenceB); } else { bandedAlg(matrix, prev, rows, cols, sequenceA, sequenceB); } score = matrix[rows - 1, cols - 1]; findAlignments(alignment, prev, rows, cols, score, sequenceA.Sequence, sequenceB.Sequence); if (alignment[0].Length > 100) { alignment[0] = alignment[0].Substring(0, 100); } if (alignment[1].Length > 100) { alignment[1] = alignment[1].Substring(0, 100); } result.Update(score, alignment[0], alignment[1]); // bundling your results into the right object type return(result); }
/// <summary> /// this is the function you implement. /// </summary> /// <param name="sequenceA">the first sequence</param> /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param> /// <param name="banded">true if alignment should be band limited.</param> /// <returns>the alignment score and the alignment (in a Result object) for sequenceA and sequenceB. The calling function places the result in the dispay appropriately. /// public ResultTable.Result Align_And_Extract(GeneSequence sequenceA, GeneSequence sequenceB, bool banded) { ResultTable.Result result = new ResultTable.Result(); int score; // place your computed alignment score here string[] alignment = new string[2]; // place your two computed alignments here Algo algo = new Algo(sequenceA, sequenceB, false, 5000); algo.RunAlgo(); algo.CalcStrings(); //algo.PrintArray(); // ********* these are placeholder assignments that you'll replace with your code ******* score = algo.GetScore(); alignment[0] = algo.GetRowString(); alignment[1] = algo.GetColString(); // *************************************************************************************** result.Update(score, alignment[0], alignment[1]); // bundling your results into the right object type return(result); }
/// <summary> /// this is the function you implement. /// </summary> /// <param name="sequenceA">the first sequence</param> /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param> /// <param name="banded">true if alignment should be band limited.</param> /// <returns>the alignment score and the alignment (in a Result object) for sequenceA and sequenceB. The calling function places the result in the dispay appropriately. /// public ResultTable.Result Align_And_Extract(Tuple <int, int> cell, GeneSequence sequenceA, GeneSequence sequenceB, bool banded) { ResultTable.Result result = new ResultTable.Result(); int score; // place your computed alignment score here string[] alignment = new string[2]; // place your two computed alignments here // ********* these are placeholder assignments that you'll replace with your code ******* string a = sequenceA.Sequence.Substring(0, sequenceA.Sequence.Length < MaxCharactersToAlign ? sequenceA.Sequence.Length : MaxCharactersToAlign); string b = sequenceB.Sequence.Substring(0, sequenceB.Sequence.Length < MaxCharactersToAlign ? sequenceB.Sequence.Length : MaxCharactersToAlign); DynamicProgramming dp = new DynamicProgramming(cell, a, b, banded); score = dp.getScore(); alignment[0] = dp.getResultA(); alignment[1] = dp.getResultB(); // *************************************************************************************** result.Update(score, alignment[0], alignment[1]); // bundling your results into the right object type return(result); }
private int computeVal(int[,] matrix, int[,] prev, int row, int col, GeneSequence sequenceA, GeneSequence sequenceB) { char letterA = sequenceA.Sequence[row - 1]; char letterB = sequenceB.Sequence[col - 1]; int diagVal = letterA == letterB ? -3 : 1; int indelVal = 5; if (matrix[row - 1, col - 1] + diagVal <= matrix[row - 1, col] + indelVal && matrix[row - 1, col - 1] + diagVal <= matrix[row, col - 1] + indelVal) { prev[row, col] = DIAG; return(matrix[row - 1, col - 1] + diagVal); } else if (matrix[row - 1, col] + indelVal < matrix[row - 1, col - 1] + diagVal && matrix[row - 1, col] + indelVal <= matrix[row, col - 1] + indelVal) { prev[row, col] = UP; return(matrix[row - 1, col] + indelVal); } else { prev[row, col] = LEFT; return(matrix[row, col - 1] + indelVal); } }
/// <summary> /// this is the function you implement. /// </summary> /// <param name="sequenceA">the first sequence</param> /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param> /// <param name="banded">true if alignment should be band limited.</param> /// <returns>the alignment score and the alignment (in a Result object) for sequenceA and sequenceB. The calling function places the result in the dispay appropriately. /// public ResultTable.Result Align_And_Extract(GeneSequence sequenceA, GeneSequence sequenceB, bool banded) { ResultTable.Result result = new ResultTable.Result(); int score; // place your computed alignment score here string[] alignment = new string[2]; // place your two computed alignments here MyAligner aligner = new MyAligner(sequenceA.Sequence, sequenceB.Sequence, banded, MaxCharactersToAlign); ; aligner.ExecuteAlignment(); // ********* these are placeholder assignments that you'll replace with your code ******* score = aligner.GetCost(); alignment[0] = aligner.GetAlignedSequenceA(); alignment[1] = aligner.GetAlignedSequenceB(); // *************************************************************************************** result.Update(score, alignment[0], alignment[1]); // bundling your results into the right object type return(result); }
public GeneSequence[] ReadGeneSequences(int max) { GeneSequence[] result; try { m_accessConn.Open(); // Find number of problems OleDbCommand countSequencesCommand = new OleDbCommand("SELECT MAX(ID) FROM DNA", m_accessConn); int sequenceCount = (int)countSequencesCommand.ExecuteScalar(); if (sequenceCount < max) { result = new GeneSequence[sequenceCount]; } else { result = new GeneSequence[max]; } // TODO: LIMIT the number of entries returned OleDbCommand selectCommand = new OleDbCommand("SELECT TOP " + result.Length + " * FROM DNA ", m_accessConn); OleDbDataReader reader = selectCommand.ExecuteReader(); for (int i = 0; reader.Read() && i < result.Length; ++i) { result[i] = new GeneSequence(reader.GetString(1), reader.GetString(2)); } } finally { m_accessConn.Close(); } return(result); }
public GeneSequence[] ReadGeneSequences(int max) { GeneSequence[] result; try { m_accessConn.Open(); // Find number of problems OleDbCommand countSequencesCommand = new OleDbCommand("SELECT MAX(ID) FROM DNA", m_accessConn); int sequenceCount = (int)countSequencesCommand.ExecuteScalar(); if (sequenceCount < max) result = new GeneSequence[sequenceCount]; else result = new GeneSequence[max]; // TODO: LIMIT the number of entries returned OleDbCommand selectCommand = new OleDbCommand("SELECT TOP " + result.Length + " * FROM DNA ", m_accessConn); OleDbDataReader reader = selectCommand.ExecuteReader(); for (int i = 0; reader.Read() && i < result.Length; ++i) { result[i] = new GeneSequence(reader.GetString(1), reader.GetString(2)); } } finally { m_accessConn.Close(); } return result; }
public void WriteGeneSequence(GeneSequence geneSequence) { //writemessage("writing to database..."); try { // because the sequence can be so long, we need to use parameters //string insertCommandString = "INSERT INTO DNA (Name, Sequence) VALUES (?, ?)"; string insertCommandString = "INSERT INTO DNA (Name, Sequence) VALUES (?, ?)"; //string insertCommandString = "INSERT INTO DNA VALUES (3, ?, ?)"; OleDbCommand insertCommand = new OleDbCommand(insertCommandString, m_accessConn); ASCIIEncoding encoding = new ASCIIEncoding(); insertCommand.Parameters.Add(new OleDbParameter("name", geneSequence.Name)); //insertCommand.Parameters.Add(new OleDbParameter("name", encoding.GetBytes(geneSequence.Name.ToCharArray()))); int a = geneSequence.Name.Length; //insertCommand.Parameters.Add(new OleDbParameter("sequence", geneSequence.Sequence.ToCharArray())); insertCommand.Parameters.Add(new OleDbParameter("sequence", geneSequence.Sequence)); m_accessConn.Open(); insertCommand.ExecuteNonQuery(); } /*catch (Exception e) { //writemessage("Error trying to write the results to the database"); //writemessage(e.ToString()); return; }*/ finally { m_accessConn.Close(); } //writemessage("done writing to database. See the row in the tProblems table with problem = " + currentProblem + " to see what happened."); }
private GeneSequence[] loadFile(string fileName) { StreamReader reader = new StreamReader(fileName); string input = ""; try { input = reader.ReadToEnd(); } catch { Console.WriteLine("Error Parsing File..."); return null; } finally { reader.Close(); } GeneSequence[] temp = new GeneSequence[NUMBER_OF_SEQUENCES]; string[] inputLines = input.Split('\r'); for (int i = 0; i < NUMBER_OF_SEQUENCES; i++) { string[] line = inputLines[i].Replace("\n","").Split('#'); temp[i] = new GeneSequence(line[0], line[1]); } return temp; }
///////////////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////// Banded Algorithm ////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////// /** * This function performs the banded algorithm on the two sequences using dynamic programming to come up with * the best alignment for both. The band is set to whatever the distance is. Currently it is d = 3 which makes the * bandwidth equals 2d+1 = 7. * Time Complexity: O(n+m) where n is the length of the first sequence and m is the length of the second sequence. This * is because the algorithm iterates over a specific number of cells for each row and column. As we don't * care about constants, the time would depend on the length of sequence A and B. Meaning each time * the array size is increased by a row or a column, we have to compute those bandwidth number of cells * again, so it is O(n+m). * Space Complexity: O(nm) where n is the length of the first sequence and m is the length of the second sequence. This * is because the algorithm creates an array of n x m */ void bandedAlgorithm(ref int score, ref string[] alignment, ref GeneSequence sequenceA, ref GeneSequence sequenceB) { // Limiting the lengths of the sequences to the max characters to align int lengthOfSequenceA = Math.Min(sequenceA.Sequence.Length, MaxCharactersToAlign); int lengthOfSequenceB = Math.Min(sequenceB.Sequence.Length, MaxCharactersToAlign); // Create two arrays to hold the intermediate values and the alignment details int[,] values = new int[lengthOfSequenceA + 1, lengthOfSequenceB + 1]; directions[,] prev = new directions[lengthOfSequenceA + 1, lengthOfSequenceB + 1]; // first fill first row and column with cost of inserts/deletes fillStartCells(ref values, ref prev, lengthOfSequenceA, lengthOfSequenceB, true); int columnStart = 1; bool alignmentFound = false; int row = 1; int column = columnStart; // Now iterate through the rest of the cells filling out the min value for each for (row = 1; row < lengthOfSequenceA + 1; row++) { for (column = columnStart; column < lengthOfSequenceB + 1; column++) { if ((distance + row) < column) { break; } // Compute values for each direction int costOfTop_Delete = values[row - 1, column] + 5; if ((distance + row) == column) { costOfTop_Delete = int.MaxValue; } int costOfLeft_Insert = values[row, column - 1] + 5; if ((distance + column) == row) { costOfLeft_Insert = int.MaxValue; } // Compute cost of moving from diagonal depending on whether the letters match int costOfMovingFromDiagonal = (sequenceA.Sequence[row - 1] == sequenceB.Sequence[column - 1]) ? -3 : 1; int costOfDiagonal = values[row - 1, column - 1] + costOfMovingFromDiagonal; // value of cell would be the minimum cost out of the three directions int costOfMin = Math.Min(costOfDiagonal, Math.Min(costOfLeft_Insert, costOfTop_Delete)); values[row, column] = costOfMin; // Store the direction if (costOfMin == costOfDiagonal) { prev[row, column] = directions.DIAGONAL; } else if (costOfMin == costOfLeft_Insert) { prev[row, column] = directions.LEFT; } else { prev[row, column] = directions.TOP; } if (column == lengthOfSequenceB && row == lengthOfSequenceA) alignmentFound = true; } if (row > distance) columnStart++; } // score would be value of the last cell if (alignmentFound) { score = values[lengthOfSequenceA, lengthOfSequenceB]; // Create the alignments createAlignments(ref alignment, ref prev, ref sequenceA, ref sequenceB, ref lengthOfSequenceA, ref lengthOfSequenceB); } else { score = int.MaxValue; alignment[0] = "No Alignment Possible"; alignment[1] = "No Alignment Possible"; } }
private void initialize(GeneSequence sequenceA, GeneSequence sequenceB) { // grab first 5000 (or all if less than 5000) characters of each sequence charA = formatSequence(sequenceA.Sequence, MaxCharactersToAlign).ToCharArray(); charB = formatSequence(sequenceB.Sequence, MaxCharactersToAlign).ToCharArray(); resultRow = new int[charB.Length + 1]; // initialize bottom row with costs for INDEL for (int i = 0; i < resultRow.Length; i++) resultRow[i] = i * INDEL; }
///////////////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////// Unrestricted Algorithm //////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////// /** * This function performs the unrestricted algorithm on the two sequences using dynamic programming to come up with * the best alignment for both. * Time Complexity: O(nm) where n is the length of the first sequence and m is the length of the second sequence. This * is because the algorithm iterates over all cells in the array of n x m * Space Complexity: O(nm) where n is the length of the first sequence and m is the length of the second sequence. This * is because the algorithm creates an array of n x m */ void unrestrictedAlgorithm (ref int score, ref string[] alignment, ref GeneSequence sequenceA, ref GeneSequence sequenceB) { // Limiting the lengths of the sequences to the max characters to align int lengthOfSequenceA = Math.Min(sequenceA.Sequence.Length, MaxCharactersToAlign); int lengthOfSequenceB = Math.Min(sequenceB.Sequence.Length, MaxCharactersToAlign); // Create two arrays to hold the intermediate values and the alignment details int[,] values = new int[lengthOfSequenceA + 1, lengthOfSequenceB + 1]; directions[,] prev = new directions[lengthOfSequenceA + 1, lengthOfSequenceB + 1]; // first fill first row and column with cost of inserts/deletes fillStartCells(ref values, ref prev, lengthOfSequenceA, lengthOfSequenceB, false); // Now iterate through the rest of the cells filling out the min value for each for (int row = 1; row < lengthOfSequenceA + 1; row++) { for (int column = 1; column < lengthOfSequenceB + 1; column++) { // Compute values for each direction int costOfTop_Delete = values[row - 1, column] + 5; int costOfLeft_Insert = values[row, column - 1] + 5; // Compute cost of moving from diagonal depending on whether the letters match int costOfMovingFromDiagonal = (sequenceA.Sequence[row - 1] == sequenceB.Sequence[column - 1]) ? -3 : 1; int costOfDiagonal = values[row - 1, column - 1] + costOfMovingFromDiagonal; // value of cell would be the minimum cost out of the three directions int costOfMin = Math.Min(costOfTop_Delete, Math.Min(costOfLeft_Insert, costOfDiagonal)); values[row, column] = costOfMin; // Store the direction if (costOfMin == costOfDiagonal) { prev[row, column] = directions.DIAGONAL; } else if (costOfMin == costOfLeft_Insert) { prev[row, column] = directions.LEFT; } else { prev[row, column] = directions.TOP; } } } // score would be value of the last cell score = values[lengthOfSequenceA, lengthOfSequenceB]; // Create the alignments createAlignments(ref alignment, ref prev, ref sequenceA, ref sequenceB, ref lengthOfSequenceA, ref lengthOfSequenceB); }
/// <summary> /// this is the function you implement. /// </summary> /// <param name="sequenceA">the first sequence</param> /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param> /// <param name="resultTableSoFar">the table of alignment results that has been generated so far using pair-wise alignment</param> /// <param name="rowInTable">this particular alignment problem will occupy a cell in this row the result table.</param> /// <param name="columnInTable">this particular alignment will occupy a cell in this column of the result table.</param> /// <returns>the alignment score for sequenceA and sequenceB. The calling function places the result in entry rowInTable,columnInTable /// of the ResultTable</returns> public int Align(GeneSequence sequenceA, GeneSequence sequenceB, ResultTable resultTableSoFar, int rowInTable, int columnInTable) { // a place holder computation. You'll want to implement your code here. return (Math.Abs(sequenceA.Sequence.Length - sequenceB.Sequence.Length)); }
///////////////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// this is the function you implement. /// </summary> /// <param name="sequenceA">the first sequence</param> /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param> /// <param name="banded">true if alignment should be band limited.</param> /// <returns>the alignment score and the alignment (in a Result object) for sequenceA and sequenceB. The calling function places the result in the dispay appropriately. /// public ResultTable.Result Align_And_Extract(GeneSequence sequenceA, GeneSequence sequenceB, bool banded) { ResultTable.Result result = new ResultTable.Result(); int score; // place your computed alignment score here string[] alignment = new string[2]; // place your two computed alignments here // ********* these are placeholder assignments that you'll replace with your code ******* score = 0; alignment[0] = ""; alignment[1] = ""; // *************************************************************************************** if (!banded) unrestrictedAlgorithm(ref score, ref alignment, ref sequenceA, ref sequenceB); else bandedAlgorithm(ref score, ref alignment, ref sequenceA, ref sequenceB); result.Update(score, alignment[0], alignment[1]); // bundling your results into the right object type return (result); }