Example #1
        public MainForm()

            statusMessage.Text = "Loading Database...";

            // load database here

                m_sequences = loadFile("../../" + GENOME_FILE);
            catch (FileNotFoundException e)
                try // Failed, try one level down...
                    m_sequences = loadFile("../" + GENOME_FILE);
                catch (FileNotFoundException e2)
                    // Failed, try same folder
                    m_sequences = loadFile(GENOME_FILE);

            m_resultTable = new ResultTable(this.dataGridViewResults, NUMBER_OF_SEQUENCES);

            statusMessage.Text = "Loaded Database.";
Example #2
        public MainForm()

            statusMessage.Text = "Loading Database...";

            // load database here

                m_sequences = loadFile("../../" + GENOME_FILE);
            catch (FileNotFoundException e)
                try // Failed, try one level down...
                    m_sequences = loadFile("../" + GENOME_FILE);
                catch (FileNotFoundException e2)
                    // Failed, try same folder
                    m_sequences = loadFile(GENOME_FILE);

            m_resultTable = new ResultTable(this.dataGridViewResults, NUMBER_OF_SEQUENCES);

            statusMessage.Text = "Loaded Database.";
        /// <summary>
        /// this is the function you implement.
        /// </summary>
        /// <param name="sequenceA">the first sequence</param>
        /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param>
        /// <param name="resultTableSoFar">the table of alignment results that has been generated so far using pair-wise alignment</param>
        /// <param name="rowInTable">this particular alignment problem will occupy a cell in this row the result table.</param>
        /// <param name="columnInTable">this particular alignment will occupy a cell in this column of the result table.</param>
        /// <returns>the alignment score for sequenceA and sequenceB.  The calling function places the result in entry rowInTable,columnInTable
        /// of the ResultTable</returns>
        public int Score(GeneSequence sequenceA, GeneSequence sequenceB, ResultTable resultTableSoFar, int rowInTable, int columnInTable)
            initialize(sequenceA, sequenceB);

            for (int i = 1; i < height; i++)
                for (int j = 1; j < width; j++)
                    int diagCellCost = 0;
                    if (X[j - 1] == Y[i - 1])
                        diagCellCost = CharsMatchCost;
                        diagCellCost = SubstitutionCost;

                    double topCell = prev[j] + InsertDeleteCost;
                    double leftCell = results[j - 1] + InsertDeleteCost;
                    double diagCell = prev[j - 1] + diagCellCost;

                    double min = Math.Min(topCell, Math.Min(diagCell, leftCell));

                    results[j] = min;


            return (int)prev[width - 1];
        /// <summary>
        /// This is the function you implement.
        /// </summary>
        /// <param name="sequenceA">the first sequence</param>
        /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param>
        /// <param name="resultTableSoFar">the table of alignment results that has been generated so far using pair-wise alignment</param>
        /// <param name="rowInTable">this particular alignment problem will occupy a cell in this row the result table.</param>
        /// <param name="columnInTable">this particular alignment will occupy a cell in this column of the result table.</param>
        /// <returns>the alignment score for sequenceA and sequenceB.  The calling function places the result in entry rowInTable,columnInTable
        /// of the ResultTable</returns>
        public int Align(GeneSequence sequenceA, GeneSequence sequenceB, ResultTable resultTableSoFar, int rowInTable, int columnInTable)
            if((columnInTable - rowInTable) < 0)
                return 0;

            Grid grid = new Grid(sequenceA.Sequence, sequenceB.Sequence, true, MaxCharactersToAlign);
            return grid.CalculateScoreSolution();
Example #5
        /// <summary>
        /// this is the function you implement.
        /// </summary>
        /// <param name="sequenceA">the first sequence</param>
        /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param>
        /// <param name="resultTableSoFar">the table of alignment results that has been generated so far using pair-wise alignment</param>
        /// <param name="rowInTable">this particular alignment problem will occupy a cell in this row the result table.</param>
        /// <param name="columnInTable">this particular alignment will occupy a cell in this column of the result table.</param>
        /// <returns>the alignment score for sequenceA and sequenceB.  The calling function places the result in entry rowInTable,columnInTable
        /// of the ResultTable</returns>
        public int Align(GeneSequence sequenceA, GeneSequence sequenceB, ResultTable resultTableSoFar, int rowInTable, int columnInTable)
            // Only fill in above the diagonal
            if (rowInTable <= columnInTable)

            string a = sequenceA.Sequence;
            string b = sequenceB.Sequence;
            int    m = Math.Min(a.Length, MaxCharactersToAlign);
            int    n = Math.Min(b.Length, MaxCharactersToAlign);

            int[][] E = new int[2][];
            E[0] = new int[MaxCharactersToAlign + 1];
            E[1] = new int[MaxCharactersToAlign + 1];

            // Initialize first row with cost of indels
            for (int j = 0; j <= n; j++)
                E[0][j] = 5 * j;

            int previous = 0;
            int active   = 1;

            for (int i = 1; i <= m; i++)
                for (int j = 0; j <= n; j++)
                    var indels = E[previous][j] + 5;

                    if (j == 0)
                        // If first element, only have one option
                        E[active][j] = indels;
                        // Get the minimum cost from the two available indels and the diagonal match/sub
                        indels = Math.Min(E[active][j - 1] + 5, indels);
                        var diff = (a[i - 1] == b[j - 1]) ? -3 : 1;
                        E[active][j] = Math.Min(indels, E[previous][j - 1] + diff);

                // Swap active and previous (new active will get overwritten)
                active   = (active == 0) ? 1 : 0;
                previous = (previous == 0) ? 1 : 0;

            // Return last element in last filled row
Example #6
 public MainForm()
     m_dbController = new DatabaseController();
     statusMessage.Text = "Loading Database...";
     // Set the number of Sequences to load below.
     m_sequences = m_dbController.ReadGeneSequences(10);
     m_resultTable = new ResultTable(this.dataGridViewResults, m_sequences.Length);
     statusMessage.Text = "Loaded Database.";
     processor = new PairWiseAlign();
Example #7
 public MainForm()
     m_dbController = new DatabaseController();
     statusMessage.Text = "Loading Database...";
     // Set the number of Sequences to load below.
     m_sequences        = m_dbController.ReadGeneSequences(10);
     m_resultTable      = new ResultTable(this.dataGridViewResults, m_sequences.Length);
     statusMessage.Text = "Loaded Database.";
     processor          = new PairWiseAlign();
Example #8
        public MainForm()
            bool failed = false;


            statusMessage.Text = "Loading Database...";

            // load database here

                m_sequences = loadFile("../../" + GENOME_FILE);
            catch (FileNotFoundException)
                try // Failed, try one level down...
                    m_sequences = loadFile("../" + GENOME_FILE);
                catch (FileNotFoundException)
                    try // Failed, try same folder
                        m_sequences = loadFile(GENOME_FILE);
                        statusMessage.Text = "Failed to load database: " + GENOME_FILE + " not found.";
                        failed = true;

            if (!failed)
                string [] names = new string[NUMBER_OF_SEQUENCES];

                for (int i = 0; i < NUMBER_OF_SEQUENCES; i++)
                    names[i] = m_sequences[i].Name;
                m_resultTable      = new ResultTable(this.dataGridViewResults, names);
                statusMessage.Text = "Loaded Database.";
                processButton.Enabled = false;
Example #9
        public MainForm()
            bool failed = false;


            statusMessage.Text = "Loading Database...";

            // load database here

                m_sequences = loadFile("../../" + GENOME_FILE);
            catch (FileNotFoundException)
                try // Failed, try one level down...
                    m_sequences = loadFile("../" + GENOME_FILE);
                catch (FileNotFoundException)
                    try // Failed, try same folder
                        m_sequences = loadFile(GENOME_FILE);
                        statusMessage.Text = "Failed to load database: " + GENOME_FILE + " not found.";
                        failed = true;

            if (!failed)
                string [] names = new string[NUMBER_OF_SEQUENCES];

                for (int i = 0; i < NUMBER_OF_SEQUENCES; i++)
                    names[i] = m_sequences[i].Name;
                m_resultTable = new ResultTable(this.dataGridViewResults, names);
                statusMessage.Text = "Loaded Database.";
                processButton.Enabled = false;
        //Dictionary<string, int> previouslyCalculatedValues = new Dictionary<string, int>();
        /// <summary>
        /// this is the function you implement.
        /// </summary>
        /// <param name="sequenceA">the first sequence</param>
        /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param>
        /// <param name="resultTableSoFar">the table of alignment results that has been generated so far using pair-wise alignment</param>
        /// <param name="rowInTable">this particular alignment problem will occupy a cell in this row the result table.</param>
        /// <param name="columnInTable">this particular alignment will occupy a cell in this column of the result table.</param>
        /// <returns>the alignment score for sequenceA and sequenceB.  The calling function places the result in entry rowInTable,columnInTable
        /// of the ResultTable</returns>
        public int Align(GeneSequence sequenceA, GeneSequence sequenceB, ResultTable resultTableSoFar, int rowInTable, int columnInTable)
            // a place holder computation.  You'll want to implement your code here.
            //string key = sequenceA.Sequence.ToString() + sequenceB.Sequence.ToString();
            //if (previouslyCalculatedValues.ContainsKey(key))
            //    return previouslyCalculatedValues[key];

            // set up algorithm
            initializeSequencing(sequenceA, sequenceB);

            // calculate each additional row
            //Overall time complexity of this part is O(n^2) and space complexity is O(n)
            for (int i = 0; i < charA.Length; i++)                     // will go through the length of our frist sequence array size 0-5000 O(n)
                resultRow = createNextRow(resultRow, charA[i], charB); //O(n)
            // return score
            //previouslyCalculatedValues.Add(key,resultRow[resultRow.Length - 1]);
            return(resultRow[resultRow.Length - 1]);
        public GeneNode Align(GeneSequence sequenceA, GeneSequence sequenceB, ResultTable resultTableSoFar, int rowInTable, int columnInTable)
            // Check for equal alignment
            if (rowInTable == columnInTable)
                return(new GeneNode(null, '0', '0', 0));

            // Linear space requirement
            var resultSet1 = new List <GeneNode>(); // Prev Row
            var resultSet2 = new List <GeneNode>(); // Current Row

            // New Sequences
            string seqA = "0" + sequenceA.Sequence;
            string seqB = "0" + sequenceB.Sequence;

            // Clean up the length to cap at 5000
            int seqALength = seqA.Length;
            int seqBLength = seqB.Length;

            if (seqA.Length > MaxCharactersToAlign + 1)
                seqALength = MaxCharactersToAlign + 1;
            if (seqB.Length > MaxCharactersToAlign + 1)
                seqBLength = MaxCharactersToAlign + 1;

            // Core alignment algorithm
            for (int i = 0; i < seqALength; i++)
                for (int j = 0; j < seqBLength; j++)
                    GeneNode acc = null;
                    // Starting position
                    if (i == 0 && j == 0)
                        acc = new GeneNode(null, '0', '0', 0);
                    // Edge case
                    else if (i == 0 && j > 0)
                        // Get the node from the left cell, and add as indel
                        acc = new GeneNode(resultSet2[j - 1], '-', seqB[j], (resultSet2[j - 1].Cost + indel));
                    // Edge case
                    else if (i > 0 && j == 0)
                        // Get the node from the top cell, and add as indel
                        acc = new GeneNode(resultSet1[j], seqA[i], '-', (resultSet1[j].Cost + indel));

                    // Match Case
                    else if ((i > 0 && j > 0) && (seqA[i] == seqB[j]))
                        // Match or indel
                        var top  = new GeneNode(resultSet1[j], seqA[i], '-', (resultSet1[j].Cost + indel));
                        var left = new GeneNode(resultSet2[j - 1], '-', seqB[j], (resultSet2[j - 1].Cost + indel));
                        var diag = new GeneNode(resultSet1[j - 1], seqA[i], seqB[j], (resultSet1[j - 1].Cost + match));
                        // Get node neighbor with smallest cost in order of: left, top, diag
                        acc = GetSmallest(left, top, diag);

                    // Subsitution Case
                    else if ((i > 0 && j > 0) && (seqA[i] != seqB[j]))
                        // Sub or indel
                        var top  = new GeneNode(resultSet1[j], seqA[i], '-', (resultSet1[j].Cost + indel));
                        var left = new GeneNode(resultSet2[j - 1], '-', seqB[j], (resultSet2[j - 1].Cost + indel));
                        var diag = new GeneNode(resultSet1[j - 1], seqA[i], seqB[j], (resultSet1[j - 1].Cost + sub));
                        // Get node neighbor with smallest cost in order of: left, top, diag
                        acc = GetSmallest(left, top, diag);
                    // Add to lower row (current row) results

                // Make lower row the new upper row and clear the old lower row
                resultSet1 = resultSet2;
                resultSet2 = new List <GeneNode>();
            // return the node at furthest (col, row) from origin
            return(resultSet1[resultSet1.Count - 1]);
        /// <summary>
        /// this is the function you implement.
        /// </summary>
        /// <param name="sequenceA">the first sequence</param>
        /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param>
        /// <param name="resultTableSoFar">the table of alignment results that has been generated so far using pair-wise alignment</param>
        /// <param name="rowInTable">this particular alignment problem will occupy a cell in this row the result table.</param>
        /// <param name="columnInTable">this particular alignment will occupy a cell in this column of the result table.</param>
        /// <returns>the alignment score for sequenceA and sequenceB.  The calling function places the result in entry rowInTable,columnInTable
        /// of the ResultTable</returns>
        public int Align(GeneSequence sequenceA, GeneSequence sequenceB, ResultTable resultTableSoFar, int rowInTable, int columnInTable)
            // Check for equal alignment
            if (rowInTable == columnInTable)

            // Check Cache
            if (InCache(rowInTable, columnInTable))

            // Linear space requirement
            resultSet1 = new List <int>(); // Prev Row
            resultSet2 = new List <int>(); // Current Row

            // New Sequences
            string seqA = "0" + sequenceA.Sequence;
            string seqB = "0" + sequenceB.Sequence;

            // Clean up the length to cap at 5000
            int seqALength = seqA.Length;
            int seqBLength = seqB.Length;

            if (seqA.Length > MaxCharactersToAlign + 1)
                seqALength = MaxCharactersToAlign + 1;
            if (seqB.Length > MaxCharactersToAlign + 1)
                seqBLength = MaxCharactersToAlign + 1;

            // Core iteration
            for (int i = 0; i < seqALength; i++)
                for (int j = 0; j < seqBLength; j++)
                    int cost = 0;
                    // First cell, no cost value
                    if (i == 0 && j == 0)
                        cost = 0;
                    // Get the cost from the left cell, and add as indel
                    else if (i == 0 && j > 0)
                        cost = resultSet2[j - 1] + indel;
                    // Get the cost from the top cell, and add as indel
                    else if (i > 0 && j == 0)
                        cost = resultSet1[j] + indel;
                    else if ((i > 0 && j > 0) && (seqA[i] == seqB[j]))
                        // Match or indel
                        int top  = resultSet1[j] + indel;
                        int left = resultSet2[j - 1] + indel;
                        int diag = resultSet1[j - 1] + match;
                        // Get smallest costs in order of: top, left, diag
                        cost = GetSmallest(top, left, diag);
                    else if ((i > 0 && j > 0) && (seqA[i] != seqB[j]))
                        // Sub or indel
                        int top  = resultSet1[j] + indel;
                        int left = resultSet2[j - 1] + indel;
                        int diag = resultSet1[j - 1] + sub;
                        // Get smallest costs in order of: top, left, diag
                        cost = GetSmallest(top, left, diag);
                    // Add to the lower row.. the result set.

                // Make the lower row the new upper row
                resultSet1 = resultSet2;
                // Clear the old lower row
                resultSet2 = new List <int>();

            // Get result
            int result = resultSet1[resultSet1.Count - 1];

            // Add to the cache
            AddToCache(rowInTable, columnInTable, result);
            AddToCache(columnInTable, rowInTable, result);

            // Return the new calculated cost
Example #13
 /// <summary>
 /// this is the function you implement.
 /// </summary>
 /// <param name="sequenceA">the first sequence</param>
 /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param>
 /// <param name="resultTableSoFar">the table of alignment results that has been generated so far using pair-wise alignment</param>
 /// <param name="rowInTable">this particular alignment problem will occupy a cell in this row the result table.</param>
 /// <param name="columnInTable">this particular alignment will occupy a cell in this column of the result table.</param>
 /// <returns>the alignment score for sequenceA and sequenceB.  The calling function places the result in entry rowInTable,columnInTable
 /// of the ResultTable</returns>
 public int Align(GeneSequence sequenceA, GeneSequence sequenceB, ResultTable resultTableSoFar, int rowInTable, int columnInTable)
     // a place holder computation.  You'll want to implement your code here.
     return (Math.Abs(sequenceA.Sequence.Length - sequenceB.Sequence.Length));
Example #14
        /// <summary>
        /// this is the function you implement.
        /// </summary>
        /// <param name="sequenceA">the first sequence</param>
        /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param>
        /// <param name="resultTableSoFar">the table of alignment results that has been generated so far using pair-wise alignment</param>
        /// <param name="rowInTable">this particular alignment problem will occupy a cell in this row the result table.</param>
        /// <param name="columnInTable">this particular alignment will occupy a cell in this column of the result table.</param>
        /// <returns>the alignment score for sequenceA and sequenceB.  The calling function places the result in entry rowInTable,columnInTable
        /// of the ResultTable</returns>
        public int Align(GeneSequence sequenceA, GeneSequence sequenceB, ResultTable resultTableSoFar, int rowInTable, int columnInTable)
            string a;
            string b;

            //limit the number of characters in each string to 5000....O(1)
            if (sequenceA.Sequence.Length > 5000)
                a = sequenceA.Sequence.Substring(0, 5000);
                a = sequenceA.Sequence;

            if (sequenceB.Sequence.Length > 5000)
                b = sequenceB.Sequence.Substring(0, 5000);
                b = sequenceB.Sequence;

            int[] topArray = new int[a.Length + 1];

            //top Array begins as the top row of our table and is hard coded....O(n)
            for (int i = 0; i < topArray.Length; i++)
                topArray[i] = i * 5;

            //bottom Array is where calculated scores are put
            int[] bottomArray = new int[a.Length + 1];

            int rowCount = 1;

            //outer loop pulls out one character at a time, represented by one row in the table
            //O(n) * inside the loop
            //So the total is O(n^2)
            foreach (char letter in b)
                //the first column is hard coded...O(1)
                bottomArray[0] = rowCount * 5;

                //inner loop iterates through each column, calculates the score
                //the inside calculations are constant, so O(n)
                for (int i = 1; i < a.Length + 1; i++)
                    int diff;   //whether or not the characters in the row/col match

                    if (letter == a[i - 1])
                        diff = -3;
                        diff = 1;

                    //the score algorithm based on dynammic progamming and Needleman-Wunsch
                    int score = scoreMin(diff + topArray[i - 1], 5 + topArray[i], 5 + bottomArray[i - 1]);
                    bottomArray[i] = score;

                //the pointers to the arrays are switched so the next row can be calculated based on
                //the row above it.  Bottom Array will be written over in the next loop
                //Using the two array method gives us O(n) space complexity
                int[] tempArray = topArray;
                topArray    = bottomArray;
                bottomArray = tempArray;

            //At this point the last index in topArray has the alignment score
Example #15
 /// <summary>
 /// this is the function you implement.
 /// </summary>
 /// <param name="sequenceA">the first sequence</param>
 /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param>
 /// <param name="resultTableSoFar">the table of alignment results that has been generated so far using pair-wise alignment</param>
 /// <param name="rowInTable">this particular alignment problem will occupy a cell in this row the result table.</param>
 /// <param name="columnInTable">this particular alignment will occupy a cell in this column of the result table.</param>
 /// <returns>the alignment score for sequenceA and sequenceB.  The calling function places the result in entry rowInTable,columnInTable
 /// of the ResultTable</returns>
 public int Align(GeneSequence sequenceA, GeneSequence sequenceB, ResultTable resultTableSoFar, int rowInTable, int columnInTable)
     // a place holder computation.  You'll want to implement your code here.
     return(Math.Abs(sequenceA.Sequence.Length - sequenceB.Sequence.Length));