public MainForm() { InitializeComponent(); statusMessage.Text = "Loading Database..."; // load database here try { m_sequences = loadFile("../../" + GENOME_FILE); } catch (FileNotFoundException e) { try // Failed, try one level down... { m_sequences = loadFile("../" + GENOME_FILE); } catch (FileNotFoundException e2) { // Failed, try same folder m_sequences = loadFile(GENOME_FILE); } } m_resultTable = new ResultTable(this.dataGridViewResults, NUMBER_OF_SEQUENCES); statusMessage.Text = "Loaded Database."; }
/// <summary> /// this is the function you implement. /// </summary> /// <param name="sequenceA">the first sequence</param> /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param> /// <param name="resultTableSoFar">the table of alignment results that has been generated so far using pair-wise alignment</param> /// <param name="rowInTable">this particular alignment problem will occupy a cell in this row the result table.</param> /// <param name="columnInTable">this particular alignment will occupy a cell in this column of the result table.</param> /// <returns>the alignment score for sequenceA and sequenceB. The calling function places the result in entry rowInTable,columnInTable /// of the ResultTable</returns> public int Score(GeneSequence sequenceA, GeneSequence sequenceB, ResultTable resultTableSoFar, int rowInTable, int columnInTable) { initialize(sequenceA, sequenceB); for (int i = 1; i < height; i++) { for (int j = 1; j < width; j++) { int diagCellCost = 0; if (X[j - 1] == Y[i - 1]) diagCellCost = CharsMatchCost; else diagCellCost = SubstitutionCost; double topCell = prev[j] + InsertDeleteCost; double leftCell = results[j - 1] + InsertDeleteCost; double diagCell = prev[j - 1] + diagCellCost; double min = Math.Min(topCell, Math.Min(diagCell, leftCell)); results[j] = min; } currentRow++; SwapArrays(); } return (int)prev[width - 1]; }
/// <summary> /// This is the function you implement. /// </summary> /// <param name="sequenceA">the first sequence</param> /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param> /// <param name="resultTableSoFar">the table of alignment results that has been generated so far using pair-wise alignment</param> /// <param name="rowInTable">this particular alignment problem will occupy a cell in this row the result table.</param> /// <param name="columnInTable">this particular alignment will occupy a cell in this column of the result table.</param> /// <returns>the alignment score for sequenceA and sequenceB. The calling function places the result in entry rowInTable,columnInTable /// of the ResultTable</returns> public int Align(GeneSequence sequenceA, GeneSequence sequenceB, ResultTable resultTableSoFar, int rowInTable, int columnInTable) { if((columnInTable - rowInTable) < 0) { return 0; } Grid grid = new Grid(sequenceA.Sequence, sequenceB.Sequence, true, MaxCharactersToAlign); return grid.CalculateScoreSolution(); }
/// <summary> /// this is the function you implement. /// </summary> /// <param name="sequenceA">the first sequence</param> /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param> /// <param name="resultTableSoFar">the table of alignment results that has been generated so far using pair-wise alignment</param> /// <param name="rowInTable">this particular alignment problem will occupy a cell in this row the result table.</param> /// <param name="columnInTable">this particular alignment will occupy a cell in this column of the result table.</param> /// <returns>the alignment score for sequenceA and sequenceB. The calling function places the result in entry rowInTable,columnInTable /// of the ResultTable</returns> public int Align(GeneSequence sequenceA, GeneSequence sequenceB, ResultTable resultTableSoFar, int rowInTable, int columnInTable) { // Only fill in above the diagonal if (rowInTable <= columnInTable) { return(0); } string a = sequenceA.Sequence; string b = sequenceB.Sequence; int m = Math.Min(a.Length, MaxCharactersToAlign); int n = Math.Min(b.Length, MaxCharactersToAlign); int[][] E = new int[2][]; E[0] = new int[MaxCharactersToAlign + 1]; E[1] = new int[MaxCharactersToAlign + 1]; // Initialize first row with cost of indels for (int j = 0; j <= n; j++) { E[0][j] = 5 * j; } int previous = 0; int active = 1; for (int i = 1; i <= m; i++) { for (int j = 0; j <= n; j++) { var indels = E[previous][j] + 5; if (j == 0) { // If first element, only have one option E[active][j] = indels; } else { // Get the minimum cost from the two available indels and the diagonal match/sub indels = Math.Min(E[active][j - 1] + 5, indels); var diff = (a[i - 1] == b[j - 1]) ? -3 : 1; E[active][j] = Math.Min(indels, E[previous][j - 1] + diff); } } // Swap active and previous (new active will get overwritten) active = (active == 0) ? 1 : 0; previous = (previous == 0) ? 1 : 0; } // Return last element in last filled row return(E[previous][n]); }
public MainForm() { InitializeComponent(); m_dbController = new DatabaseController(); m_dbController.EstablishConnection("../../db1.mdb"); statusMessage.Text = "Loading Database..."; // Set the number of Sequences to load below. m_sequences = m_dbController.ReadGeneSequences(10); m_resultTable = new ResultTable(this.dataGridViewResults, m_sequences.Length); statusMessage.Text = "Loaded Database."; processor = new PairWiseAlign(); }
public MainForm() { bool failed = false; InitializeComponent(); statusMessage.Text = "Loading Database..."; // load database here try { m_sequences = loadFile("../../" + GENOME_FILE); } catch (FileNotFoundException) { try // Failed, try one level down... { m_sequences = loadFile("../" + GENOME_FILE); } catch (FileNotFoundException) { try // Failed, try same folder { m_sequences = loadFile(GENOME_FILE); } catch { statusMessage.Text = "Failed to load database: " + GENOME_FILE + " not found."; Refresh(); failed = true; } } } if (!failed) { string [] names = new string[NUMBER_OF_SEQUENCES]; for (int i = 0; i < NUMBER_OF_SEQUENCES; i++) { names[i] = m_sequences[i].Name; } m_resultTable = new ResultTable(this.dataGridViewResults, names); statusMessage.Text = "Loaded Database."; } else { processButton.Enabled = false; } }
//Dictionary<string, int> previouslyCalculatedValues = new Dictionary<string, int>(); /// <summary> /// this is the function you implement. /// </summary> /// <param name="sequenceA">the first sequence</param> /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param> /// <param name="resultTableSoFar">the table of alignment results that has been generated so far using pair-wise alignment</param> /// <param name="rowInTable">this particular alignment problem will occupy a cell in this row the result table.</param> /// <param name="columnInTable">this particular alignment will occupy a cell in this column of the result table.</param> /// <returns>the alignment score for sequenceA and sequenceB. The calling function places the result in entry rowInTable,columnInTable /// of the ResultTable</returns> public int Align(GeneSequence sequenceA, GeneSequence sequenceB, ResultTable resultTableSoFar, int rowInTable, int columnInTable) { // a place holder computation. You'll want to implement your code here. //string key = sequenceA.Sequence.ToString() + sequenceB.Sequence.ToString(); //if (previouslyCalculatedValues.ContainsKey(key)) // return previouslyCalculatedValues[key]; // set up algorithm initializeSequencing(sequenceA, sequenceB); // calculate each additional row //Overall time complexity of this part is O(n^2) and space complexity is O(n) for (int i = 0; i < charA.Length; i++) // will go through the length of our frist sequence array size 0-5000 O(n) { resultRow = createNextRow(resultRow, charA[i], charB); //O(n) } // return score //previouslyCalculatedValues.Add(key,resultRow[resultRow.Length - 1]); return(resultRow[resultRow.Length - 1]); }
public GeneNode Align(GeneSequence sequenceA, GeneSequence sequenceB, ResultTable resultTableSoFar, int rowInTable, int columnInTable) { // Check for equal alignment if (rowInTable == columnInTable) { return(new GeneNode(null, '0', '0', 0)); } // Linear space requirement var resultSet1 = new List <GeneNode>(); // Prev Row var resultSet2 = new List <GeneNode>(); // Current Row // New Sequences string seqA = "0" + sequenceA.Sequence; string seqB = "0" + sequenceB.Sequence; // Clean up the length to cap at 5000 int seqALength = seqA.Length; int seqBLength = seqB.Length; if (seqA.Length > MaxCharactersToAlign + 1) { seqALength = MaxCharactersToAlign + 1; } if (seqB.Length > MaxCharactersToAlign + 1) { seqBLength = MaxCharactersToAlign + 1; } // Core alignment algorithm for (int i = 0; i < seqALength; i++) { for (int j = 0; j < seqBLength; j++) { GeneNode acc = null; // Starting position if (i == 0 && j == 0) { acc = new GeneNode(null, '0', '0', 0); } // Edge case else if (i == 0 && j > 0) { // Get the node from the left cell, and add as indel acc = new GeneNode(resultSet2[j - 1], '-', seqB[j], (resultSet2[j - 1].Cost + indel)); } // Edge case else if (i > 0 && j == 0) { // Get the node from the top cell, and add as indel acc = new GeneNode(resultSet1[j], seqA[i], '-', (resultSet1[j].Cost + indel)); } // Match Case else if ((i > 0 && j > 0) && (seqA[i] == seqB[j])) { // Match or indel var top = new GeneNode(resultSet1[j], seqA[i], '-', (resultSet1[j].Cost + indel)); var left = new GeneNode(resultSet2[j - 1], '-', seqB[j], (resultSet2[j - 1].Cost + indel)); var diag = new GeneNode(resultSet1[j - 1], seqA[i], seqB[j], (resultSet1[j - 1].Cost + match)); // Get node neighbor with smallest cost in order of: left, top, diag acc = GetSmallest(left, top, diag); } // Subsitution Case else if ((i > 0 && j > 0) && (seqA[i] != seqB[j])) { // Sub or indel var top = new GeneNode(resultSet1[j], seqA[i], '-', (resultSet1[j].Cost + indel)); var left = new GeneNode(resultSet2[j - 1], '-', seqB[j], (resultSet2[j - 1].Cost + indel)); var diag = new GeneNode(resultSet1[j - 1], seqA[i], seqB[j], (resultSet1[j - 1].Cost + sub)); // Get node neighbor with smallest cost in order of: left, top, diag acc = GetSmallest(left, top, diag); } // Add to lower row (current row) results resultSet2.Add(acc); } // Make lower row the new upper row and clear the old lower row resultSet1 = resultSet2; resultSet2 = new List <GeneNode>(); } // return the node at furthest (col, row) from origin return(resultSet1[resultSet1.Count - 1]); }
/// <summary> /// this is the function you implement. /// </summary> /// <param name="sequenceA">the first sequence</param> /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param> /// <param name="resultTableSoFar">the table of alignment results that has been generated so far using pair-wise alignment</param> /// <param name="rowInTable">this particular alignment problem will occupy a cell in this row the result table.</param> /// <param name="columnInTable">this particular alignment will occupy a cell in this column of the result table.</param> /// <returns>the alignment score for sequenceA and sequenceB. The calling function places the result in entry rowInTable,columnInTable /// of the ResultTable</returns> public int Align(GeneSequence sequenceA, GeneSequence sequenceB, ResultTable resultTableSoFar, int rowInTable, int columnInTable) { // Check for equal alignment if (rowInTable == columnInTable) { return(0); } // Check Cache if (InCache(rowInTable, columnInTable)) { return(cache[rowInTable][columnInTable]); } // Linear space requirement resultSet1 = new List <int>(); // Prev Row resultSet2 = new List <int>(); // Current Row // New Sequences string seqA = "0" + sequenceA.Sequence; string seqB = "0" + sequenceB.Sequence; // Clean up the length to cap at 5000 int seqALength = seqA.Length; int seqBLength = seqB.Length; if (seqA.Length > MaxCharactersToAlign + 1) { seqALength = MaxCharactersToAlign + 1; } if (seqB.Length > MaxCharactersToAlign + 1) { seqBLength = MaxCharactersToAlign + 1; } // Core iteration for (int i = 0; i < seqALength; i++) { for (int j = 0; j < seqBLength; j++) { int cost = 0; // First cell, no cost value if (i == 0 && j == 0) { cost = 0; } // Get the cost from the left cell, and add as indel else if (i == 0 && j > 0) { cost = resultSet2[j - 1] + indel; } // Get the cost from the top cell, and add as indel else if (i > 0 && j == 0) { cost = resultSet1[j] + indel; } else if ((i > 0 && j > 0) && (seqA[i] == seqB[j])) { // Match or indel int top = resultSet1[j] + indel; int left = resultSet2[j - 1] + indel; int diag = resultSet1[j - 1] + match; // Get smallest costs in order of: top, left, diag cost = GetSmallest(top, left, diag); } else if ((i > 0 && j > 0) && (seqA[i] != seqB[j])) { // Sub or indel int top = resultSet1[j] + indel; int left = resultSet2[j - 1] + indel; int diag = resultSet1[j - 1] + sub; // Get smallest costs in order of: top, left, diag cost = GetSmallest(top, left, diag); } // Add to the lower row.. the result set. resultSet2.Add(cost); } // Make the lower row the new upper row resultSet1 = resultSet2; // Clear the old lower row resultSet2 = new List <int>(); } // Get result int result = resultSet1[resultSet1.Count - 1]; // Add to the cache AddToCache(rowInTable, columnInTable, result); AddToCache(columnInTable, rowInTable, result); // Return the new calculated cost return(result); }
/// <summary> /// this is the function you implement. /// </summary> /// <param name="sequenceA">the first sequence</param> /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param> /// <param name="resultTableSoFar">the table of alignment results that has been generated so far using pair-wise alignment</param> /// <param name="rowInTable">this particular alignment problem will occupy a cell in this row the result table.</param> /// <param name="columnInTable">this particular alignment will occupy a cell in this column of the result table.</param> /// <returns>the alignment score for sequenceA and sequenceB. The calling function places the result in entry rowInTable,columnInTable /// of the ResultTable</returns> public int Align(GeneSequence sequenceA, GeneSequence sequenceB, ResultTable resultTableSoFar, int rowInTable, int columnInTable) { // a place holder computation. You'll want to implement your code here. return (Math.Abs(sequenceA.Sequence.Length - sequenceB.Sequence.Length)); }
/// <summary> /// this is the function you implement. /// </summary> /// <param name="sequenceA">the first sequence</param> /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param> /// <param name="resultTableSoFar">the table of alignment results that has been generated so far using pair-wise alignment</param> /// <param name="rowInTable">this particular alignment problem will occupy a cell in this row the result table.</param> /// <param name="columnInTable">this particular alignment will occupy a cell in this column of the result table.</param> /// <returns>the alignment score for sequenceA and sequenceB. The calling function places the result in entry rowInTable,columnInTable /// of the ResultTable</returns> public int Align(GeneSequence sequenceA, GeneSequence sequenceB, ResultTable resultTableSoFar, int rowInTable, int columnInTable) { string a; string b; //limit the number of characters in each string to 5000....O(1) if (sequenceA.Sequence.Length > 5000) { a = sequenceA.Sequence.Substring(0, 5000); } else { a = sequenceA.Sequence; } if (sequenceB.Sequence.Length > 5000) { b = sequenceB.Sequence.Substring(0, 5000); } else { b = sequenceB.Sequence; } int[] topArray = new int[a.Length + 1]; //top Array begins as the top row of our table and is hard coded....O(n) for (int i = 0; i < topArray.Length; i++) { topArray[i] = i * 5; } //bottom Array is where calculated scores are put int[] bottomArray = new int[a.Length + 1]; int rowCount = 1; //outer loop pulls out one character at a time, represented by one row in the table //O(n) * inside the loop //So the total is O(n^2) foreach (char letter in b) { //the first column is hard coded...O(1) bottomArray[0] = rowCount * 5; rowCount++; //inner loop iterates through each column, calculates the score //the inside calculations are constant, so O(n) for (int i = 1; i < a.Length + 1; i++) { int diff; //whether or not the characters in the row/col match if (letter == a[i - 1]) { diff = -3; } else { diff = 1; } //the score algorithm based on dynammic progamming and Needleman-Wunsch int score = scoreMin(diff + topArray[i - 1], 5 + topArray[i], 5 + bottomArray[i - 1]); bottomArray[i] = score; } //the pointers to the arrays are switched so the next row can be calculated based on //the row above it. Bottom Array will be written over in the next loop //Using the two array method gives us O(n) space complexity int[] tempArray = topArray; topArray = bottomArray; bottomArray = tempArray; } //At this point the last index in topArray has the alignment score return(topArray[a.Length]); }
/// <summary> /// this is the function you implement. /// </summary> /// <param name="sequenceA">the first sequence</param> /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param> /// <param name="resultTableSoFar">the table of alignment results that has been generated so far using pair-wise alignment</param> /// <param name="rowInTable">this particular alignment problem will occupy a cell in this row the result table.</param> /// <param name="columnInTable">this particular alignment will occupy a cell in this column of the result table.</param> /// <returns>the alignment score for sequenceA and sequenceB. The calling function places the result in entry rowInTable,columnInTable /// of the ResultTable</returns> public int Align(GeneSequence sequenceA, GeneSequence sequenceB, ResultTable resultTableSoFar, int rowInTable, int columnInTable) { // a place holder computation. You'll want to implement your code here. return(Math.Abs(sequenceA.Sequence.Length - sequenceB.Sequence.Length)); }