Esempio n. 1
0
        private int computeVal(int[,] matrix, int[,] prev, int row, int col, GeneSequence sequenceA, GeneSequence sequenceB)
        {
            // Get the two letters to compare.
            char letterA  = sequenceA.Sequence[row - 1];
            char letterB  = sequenceB.Sequence[col - 1];
            int  diagVal  = letterA == letterB ? -3 : 1;    // If they are the same, the diagonal score is -3, otherwise 1
            int  indelVal = 5;

            // If the diagnoal score is the smallest of the three, store the DIAG value in the prev matrix and return the cost for the cell at matrix[row,col]
            if (matrix[row - 1, col - 1] + diagVal <= matrix[row - 1, col] + indelVal && matrix[row - 1, col - 1] + diagVal <= matrix[row, col - 1] + indelVal)
            {
                prev[row, col] = DIAG;
                return(matrix[row - 1, col - 1] + diagVal);
            }
            // Else if the indel score from above is the smallest, store the UP value in the prev matrix and return the cost
            else if (matrix[row - 1, col] + indelVal < matrix[row - 1, col - 1] + diagVal && matrix[row - 1, col] + indelVal <= matrix[row, col - 1] + indelVal)
            {
                prev[row, col] = UP;
                return(matrix[row - 1, col] + indelVal);
            }
            // Else the indel score from the left must be the smallest. Store it in prev and return the cost.
            else
            {
                prev[row, col] = LEFT;
                return(matrix[row, col - 1] + indelVal);
            }
        }
Esempio n. 2
0
        private void bandedAlg(int[,] matrix, int[,] prev, int rows, int cols, GeneSequence sequenceA, GeneSequence sequenceB)
        {
            if (Math.Abs(rows - cols) > 3)
            {
                matrix[rows - 1, cols - 1] = int.MaxValue;
                return;
            }

            int i, j;

            i = j = 0;
            try
            {
                int maxD = rows > cols ? rows : cols;
                for (i = 1; i < maxD; i++)
                {
                    for (j = 0; j < 4; j++)
                    {
                        if (i + j < cols && i < rows)
                        {
                            matrix[i, i + j] = computeVal(matrix, prev, i, i + j, sequenceA, sequenceB);
                        }
                        if (i + j < rows && i < cols)
                        {
                            matrix[i + j, i] = computeVal(matrix, prev, i + j, i, sequenceA, sequenceB);
                        }
                    }
                }
            }
            catch (IndexOutOfRangeException e)
            {
                Console.WriteLine("rows=" + rows + " cols=" + cols);
                Console.WriteLine("i=" + i + " j=" + j);
            }
        }
Esempio n. 3
0
        private void bandedAlg(int[,] matrix, int[,] prev, int rows, int cols, GeneSequence sequenceA, GeneSequence sequenceB)
        {
            // If the difference is greater than 3, they cannot be aligned.
            if (Math.Abs(rows - cols) > 3)
            {
                // Set the score cell to int.MaxValue to indicate that they weren't aligned.
                matrix[rows - 1, cols - 1] = int.MaxValue;
                return;
            }

            int i, j;

            i = j = 0;
            // Get the larger of the two out of the rows and columns. This is because we need to get to the bottom right.
            int maxD = rows > cols ? rows : cols;

            // This loop travels down the diagonal
            for (i = 1; i < maxD; i++)
            {
                // This loop uses an offset to compute the cell on the diagonal, 3 to the right, and 3 below.
                for (j = 0; j < 4; j++)
                {
                    // Compute cell to the right
                    if (i + j < cols && i < rows)
                    {
                        matrix[i, i + j] = computeVal(matrix, prev, i, i + j, sequenceA, sequenceB);
                    }
                    // Compute cell below.
                    if (i + j < rows && i < cols)
                    {
                        matrix[i + j, i] = computeVal(matrix, prev, i + j, i, sequenceA, sequenceB);
                    }
                }
            }
        }
Esempio n. 4
0
        public void WriteGeneSequence(GeneSequence geneSequence)
        {
            //writemessage("writing to database...");
            try
            {
                // because the sequence can be so long, we need to use parameters
                //string insertCommandString = "INSERT INTO DNA (Name, Sequence) VALUES (?, ?)";
                string insertCommandString = "INSERT INTO DNA (Name, Sequence) VALUES (?, ?)";
                //string insertCommandString = "INSERT INTO DNA VALUES (3, ?, ?)";
                OleDbCommand  insertCommand = new OleDbCommand(insertCommandString, m_accessConn);
                ASCIIEncoding encoding      = new ASCIIEncoding();
                insertCommand.Parameters.Add(new OleDbParameter("name", geneSequence.Name));
                //insertCommand.Parameters.Add(new OleDbParameter("name", encoding.GetBytes(geneSequence.Name.ToCharArray())));
                int a = geneSequence.Name.Length;
                //insertCommand.Parameters.Add(new OleDbParameter("sequence", geneSequence.Sequence.ToCharArray()));
                insertCommand.Parameters.Add(new OleDbParameter("sequence", geneSequence.Sequence));
                m_accessConn.Open();
                insertCommand.ExecuteNonQuery();
            }

            /*catch (Exception e)
             * {
             *  //writemessage("Error trying to write the results to the database");
             *  //writemessage(e.ToString());
             *  return;
             * }*/
            finally
            {
                m_accessConn.Close();
            }
            //writemessage("done writing to database.  See the row in the tProblems table with problem = " + currentProblem + " to see what happened.");
        }
        /// <summary>
        /// this is the function you implement.
        /// </summary>
        /// <param name="sequenceA">the first sequence</param>
        /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param>
        /// <param name="resultTableSoFar">the table of alignment results that has been generated so far using pair-wise alignment</param>
        /// <param name="rowInTable">this particular alignment problem will occupy a cell in this row the result table.</param>
        /// <param name="columnInTable">this particular alignment will occupy a cell in this column of the result table.</param>
        /// <returns>the alignment score for sequenceA and sequenceB.  The calling function places the result in entry rowInTable,columnInTable
        /// of the ResultTable</returns>
        public int Score(GeneSequence sequenceA, GeneSequence sequenceB, ResultTable resultTableSoFar, int rowInTable, int columnInTable)
        {
            initialize(sequenceA, sequenceB);

            for (int i = 1; i < height; i++)
            {
                for (int j = 1; j < width; j++)
                {
                    int diagCellCost = 0;
                    if (X[j - 1] == Y[i - 1])
                        diagCellCost = CharsMatchCost;
                    else
                        diagCellCost = SubstitutionCost;

                    double topCell = prev[j] + InsertDeleteCost;
                    double leftCell = results[j - 1] + InsertDeleteCost;
                    double diagCell = prev[j - 1] + diagCellCost;

                    double min = Math.Min(topCell, Math.Min(diagCell, leftCell));

                    results[j] = min;
                }

                currentRow++;
                SwapArrays();
            }

            return (int)prev[width - 1];
        }
Esempio n. 6
0
        /////////////////////////////////////////////////////////////////////////////////////////////////////////////////
        /////////////////////////////////////////////////////////////////////////////////////////////////////////////////
        /////////////////////////////////////////////////////////////////////////////////////////////////////////////////
        /// <summary>
        /// this is the function you implement.
        /// </summary>
        /// <param name="sequenceA">the first sequence</param>
        /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param>
        /// <param name="banded">true if alignment should be band limited.</param>
        /// <returns>the alignment score and the alignment (in a Result object) for sequenceA and sequenceB.  The calling function places the result in the dispay appropriately.
        ///
        public ResultTable.Result Align_And_Extract(GeneSequence sequenceA, GeneSequence sequenceB, bool banded)
        {
            ResultTable.Result result = new ResultTable.Result();
            int score;                                                       // place your computed alignment score here

            string[] alignment = new string[2];                              // place your two computed alignments here


            // ********* these are placeholder assignments that you'll replace with your code  *******
            score        = 0;
            alignment[0] = "";
            alignment[1] = "";
            // ***************************************************************************************
            if (!banded)
            {
                unrestrictedAlgorithm(ref score, ref alignment, ref sequenceA, ref sequenceB);
            }
            else
            {
                bandedAlgorithm(ref score, ref alignment, ref sequenceA, ref sequenceB);
            }

            result.Update(score, alignment[0], alignment[1]);                  // bundling your results into the right object type
            return(result);
        }
Esempio n. 7
0
        /**
         * This function creates the alignments for both sequences using the previous pointers array
         * Time Complexity: O(n) where n is the length of the larger sequence because it the best alignment
         *                  is as long as the length of the longest sequence
         * Space Complexity: O(n) where n is the length of the larger sequence as it creates a string as long as it
         */
        void createAlignments(ref string[] alignment, ref directions[,] prev, ref GeneSequence sequenceA, ref GeneSequence sequenceB,
                              ref int lengthOfSequenceA, ref int lengthOfSequenceB)
        {
            int           rowIterator = lengthOfSequenceA, columnIterator = lengthOfSequenceB;
            StringBuilder first = new StringBuilder(), second = new StringBuilder();

            while (rowIterator != 0 || columnIterator != 0)
            {
                if (prev[rowIterator, columnIterator] == directions.DIAGONAL) // match/sub
                {
                    first.Insert(0, sequenceA.Sequence[rowIterator - 1]);
                    second.Insert(0, sequenceB.Sequence[columnIterator - 1]);
                    rowIterator--;
                    columnIterator--;
                }
                else if (prev[rowIterator, columnIterator] == directions.LEFT) //insert
                {
                    first.Insert(0, '-');
                    second.Insert(0, sequenceB.Sequence[columnIterator - 1]);
                    columnIterator--;
                }
                else // delete
                {
                    first.Insert(0, sequenceA.Sequence[rowIterator - 1]);
                    second.Insert(0, '-');
                    rowIterator--;
                }
            }

            // Limiting the length of the string to 100 if it exceeds it
            alignment[0] = first.ToString().Substring(0, Math.Min(first.Length, 100));
            alignment[1] = second.ToString().Substring(0, Math.Min(second.Length, 100));
        }
Esempio n. 8
0
        private GeneSequence[] loadFile(string fileName)
        {
            StreamReader reader = new StreamReader(fileName);
            string       input  = "";

            try
            {
                input = reader.ReadToEnd();
            }
            catch
            {
                Console.WriteLine("Error Parsing File...");
                return(null);
            }
            finally
            {
                reader.Close();
            }

            GeneSequence[] temp       = new GeneSequence[NUMBER_OF_SEQUENCES];
            string[]       inputLines = input.Split('\r');

            for (int i = 0; i < NUMBER_OF_SEQUENCES; i++)
            {
                string[] line = inputLines[i].Replace("\n", "").Split('#');
                temp[i] = new GeneSequence(line[0], line[1]);
            }
            return(temp);
        }
Esempio n. 9
0
        private void dataGridViewResults_CellMouseClick(object sender, DataGridViewCellMouseEventArgs e)
        {
            GeneSequence seqA = m_sequences[e.ColumnIndex];
            GeneSequence seqB = m_sequences[e.RowIndex];

            String[] results       = processor.extractSolution(seqA, seqB);
            String   outputMessage = String.Format("Output Console: {0}= MATCH, {1}= SUB, {2}= INDEL",
                                                   processor.MATCH_CHAR, processor.SUB_CHAR, processor.INDEL_CHAR);

            String outputText = String.Format("{0}\r\nGene Alignment for Cell (Row:{1}, Col:{2})\r\nA: {3}\r\n   {4}\r\nB: {5}",
                                              outputMessage,
                                              e.RowIndex + 1,
                                              e.ColumnIndex + 1,
                                              processor.formatSequence(results[0], MaxToDisplay),
                                              processor.formatSequence(results[2], MaxToDisplay),
                                              processor.formatSequence(results[1], MaxToDisplay));

            String sideText = String.Format("\r\n\r\nA: {0}\r\n\r\nB: {1}\r\n\r\nA: {2}\r\n\r\nB: {3}",
                                            seqA.Name,
                                            seqB.Name,
                                            processor.formatSequence(seqA.Sequence, 15),
                                            processor.formatSequence(seqB.Sequence, 15));

            sideBar.Text       = sideText;
            outputConsole.Text = outputText;
        }
Esempio n. 10
0
        /**
        * This function creates the alignments for both sequences using the previous pointers array
        * Time Complexity: O(n) where n is the length of the larger sequence because it the best alignment
        *                  is as long as the length of the longest sequence
        * Space Complexity: O(n) where n is the length of the larger sequence as it creates a string as long as it
        */
        void createAlignments(ref string[] alignment, ref directions[,] prev, ref GeneSequence sequenceA, ref GeneSequence sequenceB,
                                                                ref int lengthOfSequenceA, ref int lengthOfSequenceB)
        {
            int rowIterator = lengthOfSequenceA, columnIterator = lengthOfSequenceB;
            StringBuilder first = new StringBuilder(), second = new StringBuilder();
            while (rowIterator != 0 || columnIterator != 0)
            {

                if (prev[rowIterator, columnIterator] == directions.DIAGONAL) // match/sub
                {
                    first.Insert(0, sequenceA.Sequence[rowIterator - 1]);
                    second.Insert(0, sequenceB.Sequence[columnIterator - 1]);
                    rowIterator--;
                    columnIterator--;
                }
                else if (prev[rowIterator, columnIterator] == directions.LEFT) //insert
                {
                    first.Insert(0, '-');
                    second.Insert(0, sequenceB.Sequence[columnIterator - 1]);
                    columnIterator--;
                }
                else // delete
                {
                    first.Insert(0, sequenceA.Sequence[rowIterator - 1]);
                    second.Insert(0, '-');
                    rowIterator--;
                }
            }

            // Limiting the length of the string to 100 if it exceeds it
            alignment[0] = first.ToString().Substring(0, Math.Min(first.Length, 100));
            alignment[1] = second.ToString().Substring(0, Math.Min(second.Length, 100));
        }
Esempio n. 11
0
        public String[] extractSolution(GeneSequence sequenceA, GeneSequence sequenceB)
        {
            // initialize arrays and strings and first row
            initialize(sequenceA, sequenceB);

            // initialize table to store each row
            List <int[]> resultTable = new List <int[]>(charA.Length + 1);

            resultTable.Add(resultRow);

            // calculate each additional row (and save it for backtrace)
            for (int i = 0; i < charA.Length; i++)
            {
                resultTable.Add(computeNextRow(resultTable[i], charA[i], charB));
            }

            // compute back trace and generate final strings
            StringBuilder buildA = new StringBuilder();
            StringBuilder buildB = new StringBuilder();
            StringBuilder buildC = new StringBuilder();
            int           row    = charA.Length;
            int           col    = charB.Length;

            // while index pointers to string a (row) and string b (column) aren't at zero figure out last operation
            while (row != 0 || col != 0)
            {
                if (resultTable[row][col] == resultTable[row][col - 1] + INDEL)
                {
                    buildA.Append('-');
                    buildB.Append(charB[--col]);
                    buildC.Append(INDEL_CHAR);
                }
                else if (resultTable[row][col] == resultTable[row - 1][col] + INDEL)
                {
                    buildA.Append(charA[--row]);
                    buildB.Append('-');
                    buildC.Append(INDEL_CHAR);
                }
                else if (resultTable[row][col] == resultTable[row - 1][col - 1] + MATCH ||
                         resultTable[row][col] == resultTable[row - 1][col - 1] + SUB)
                {
                    buildA.Append(charA[--row]);
                    buildB.Append(charB[--col]);
                    buildC.Append(charB[col] == charA[row] ? MATCH_CHAR : SUB_CHAR);
                }
                else
                {
                    throw new ArgumentException();
                }
            }

            String[] results = new String[3];
            results[0] = reverseString(buildA.ToString());
            results[1] = reverseString(buildB.ToString());
            results[2] = reverseString(buildC.ToString());
            return(results);
        }
Esempio n. 12
0
        /// <summary>
        /// This is the function you implement.
        /// </summary>
        /// <param name="sequenceA">the first sequence</param>
        /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param>
        /// <param name="resultTableSoFar">the table of alignment results that has been generated so far using pair-wise alignment</param>
        /// <param name="rowInTable">this particular alignment problem will occupy a cell in this row the result table.</param>
        /// <param name="columnInTable">this particular alignment will occupy a cell in this column of the result table.</param>
        /// <returns>the alignment score for sequenceA and sequenceB.  The calling function places the result in entry rowInTable,columnInTable
        /// of the ResultTable</returns>
        public int Align(GeneSequence sequenceA, GeneSequence sequenceB, ResultTable resultTableSoFar, int rowInTable, int columnInTable)
        {
            if((columnInTable - rowInTable) < 0)
            {
                return 0;
            }

            Grid grid = new Grid(sequenceA.Sequence, sequenceB.Sequence, true, MaxCharactersToAlign);
            return grid.CalculateScoreSolution();
        }
Esempio n. 13
0
 private void unrestricted(int[,] matrix, int[,] prev, int rows, int cols, GeneSequence sequenceA, GeneSequence sequenceB)
 {
     for (int i = 1; i < rows; i++)
     {
         for (int j = 1; j < cols; j++)
         {
             matrix[i, j] = computeVal(matrix, prev, i, j, sequenceA, sequenceB);
         }
     }
 }
Esempio n. 14
0
        /// <summary>
        /// this is the function you implement.
        /// </summary>
        /// <param name="sequenceA">the first sequence</param>
        /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param>
        /// <param name="banded">true if alignment should be band limited.</param>
        /// <returns>the alignment score and the alignment (in a Result object) for sequenceA and sequenceB.  The calling function places the result in the display appropriately.
        ///
        public ResultTable.Result Align_And_Extract(GeneSequence sequenceA, GeneSequence sequenceB, bool banded)
        {
            ResultTable.Result result = new ResultTable.Result();
            int score;                                                       // place your computed alignment score here

            string[] alignment = new string[2];                              // place your two computed alignments here
            int      sub       = MaxCharactersToAlign;

            if (sequenceA.Sequence.Length < sub)
            {
                sub = sequenceA.Sequence.Length;
            }
            int sub2 = MaxCharactersToAlign;

            if (sequenceB.Sequence.Length < sub2)
            {
                sub2 = sequenceB.Sequence.Length;
            }

            // ********* these are placeholder assignments that you'll replace with your code  *******
            score        = int.MaxValue;
            alignment[0] = "No Alignment Possible";
            alignment[1] = "No Alignment Possible";

            EditDistance editor;

            if (banded)
            {
                if (Math.Abs(sub2 - sub) > Bandwidth)
                {
                    result.Update(score, alignment[0], alignment[1]);                  // bundling your results into the right object type
                    return(result);
                }
                editor = new EditDistance(sequenceA.Sequence.Substring(0, sub), sequenceB.Sequence.Substring(0, sub2));
                editor.setupBanded();
                //Console.WriteLine(editor.toString());
                alignment = editor.bandedResults();
                //Console.WriteLine(editor.toString());
                score = editor.value();
            }
            else
            {
                editor = new EditDistance(sequenceA.Sequence.Substring(0, sub), sequenceB.Sequence.Substring(0, sub2));
                editor.setupUnbanded();
                alignment = editor.results();
                score     = editor.value();
            }

            // ***************************************************************************************


            result.Update(score, alignment[0], alignment[1]);                  // bundling your results into the right object type
            return(result);
        }
Esempio n. 15
0
 private void unrestricted(int[,] matrix, int[,] prev, int rows, int cols, GeneSequence sequenceA, GeneSequence sequenceB)
 {
     // For every cell from top left to bottom right, compute the value. Compute by row.
     for (int i = 1; i < rows; i++)
     {
         for (int j = 1; j < cols; j++)
         {
             matrix[i, j] = computeVal(matrix, prev, i, j, sequenceA, sequenceB);
         }
     }
 }
Esempio n. 16
0
        /// <summary>
        /// this is the function you implement.
        /// </summary>
        /// <param name="sequenceA">the first sequence</param>
        /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param>
        /// <param name="resultTableSoFar">the table of alignment results that has been generated so far using pair-wise alignment</param>
        /// <param name="rowInTable">this particular alignment problem will occupy a cell in this row the result table.</param>
        /// <param name="columnInTable">this particular alignment will occupy a cell in this column of the result table.</param>
        /// <returns>the alignment score for sequenceA and sequenceB.  The calling function places the result in entry rowInTable,columnInTable
        /// of the ResultTable</returns>
        public int Align(GeneSequence sequenceA, GeneSequence sequenceB, ResultTable resultTableSoFar, int rowInTable, int columnInTable)
        {
            // Only fill in above the diagonal
            if (rowInTable <= columnInTable)
            {
                return(0);
            }

            string a = sequenceA.Sequence;
            string b = sequenceB.Sequence;
            int    m = Math.Min(a.Length, MaxCharactersToAlign);
            int    n = Math.Min(b.Length, MaxCharactersToAlign);

            int[][] E = new int[2][];
            E[0] = new int[MaxCharactersToAlign + 1];
            E[1] = new int[MaxCharactersToAlign + 1];

            // Initialize first row with cost of indels
            for (int j = 0; j <= n; j++)
            {
                E[0][j] = 5 * j;
            }

            int previous = 0;
            int active   = 1;

            for (int i = 1; i <= m; i++)
            {
                for (int j = 0; j <= n; j++)
                {
                    var indels = E[previous][j] + 5;

                    if (j == 0)
                    {
                        // If first element, only have one option
                        E[active][j] = indels;
                    }
                    else
                    {
                        // Get the minimum cost from the two available indels and the diagonal match/sub
                        indels = Math.Min(E[active][j - 1] + 5, indels);
                        var diff = (a[i - 1] == b[j - 1]) ? -3 : 1;
                        E[active][j] = Math.Min(indels, E[previous][j - 1] + diff);
                    }
                }

                // Swap active and previous (new active will get overwritten)
                active   = (active == 0) ? 1 : 0;
                previous = (previous == 0) ? 1 : 0;
            }

            // Return last element in last filled row
            return(E[previous][n]);
        }
Esempio n. 17
0
 private void initialize(GeneSequence sequenceA, GeneSequence sequenceB)
 {
     // grab first 5000 (or all if less than 5000) characters of each sequence
     charA     = formatSequence(sequenceA.Sequence, MaxCharactersToAlign).ToCharArray();
     charB     = formatSequence(sequenceB.Sequence, MaxCharactersToAlign).ToCharArray();
     resultRow = new int[charB.Length + 1];
     // initialize bottom row with costs for INDEL
     for (int i = 0; i < resultRow.Length; i++)
     {
         resultRow[i] = i * INDEL;
     }
 }
Esempio n. 18
0
        public int Align(GeneSequence sequenceA, GeneSequence sequenceB)
        {
            // initialize arrays and strings and first row
            initialize(sequenceA, sequenceB);

            // calculate each additional row
            for (int i = 0; i < charA.Length; i++)
                resultRow = computeNextRow(resultRow, charA[i], charB);

            // return score
            return resultRow[resultRow.Length - 1];
        }
        //Above are functions dealing with the alignment, below are functions dealing with the extraction

        public String[] extractSequences(GeneSequence sequenceA, GeneSequence sequenceB)
        {
            // set up backtrace
            initializeSequencing(sequenceA, sequenceB);

            // initialize table to store each row
            List <int[]> resultTable = new List <int[]>(charA.Length + 1);

            resultTable.Add(resultRow);

            // calculate individual table
            for (int i = 0; i < charA.Length; i++) //recalculates the table in O(n^2) time as before in scoring algorithm
            {
                resultTable.Add(createNextRow(resultTable[i], charA[i], charB));
            }

            // initialize stringholders
            StringBuilder one = new StringBuilder();
            StringBuilder two = new StringBuilder();
            int           row = charA.Length;
            int           col = charB.Length;

            // backtrace strings
            // creates the string in reverse order as it traverses from the end to the beginning only going through those on the final path
            while (row != 0 || col != 0)
            {
                if (resultTable[row][col] == resultTable[row][col - 1] + INDEL)
                {
                    one.Append('-');
                    two.Append(charB[--col]);
                }
                else if (resultTable[row][col] == resultTable[row - 1][col] + INDEL)
                {
                    one.Append(charA[--row]);
                    two.Append('-');
                }
                else if (resultTable[row][col] == resultTable[row - 1][col - 1] + MATCH ||
                         resultTable[row][col] == resultTable[row - 1][col - 1] + SUB)
                {
                    one.Append(charA[--row]);
                    two.Append(charB[--col]);
                }
                else
                {
                    throw new ArgumentException();
                }
            }

            String[] results = new String[2];
            results[0] = reverseString(one.ToString());
            results[1] = reverseString(two.ToString());
            return(results);
        }
Esempio n. 20
0
        public String[] extractSolution(GeneSequence sequenceA, GeneSequence sequenceB)
        {
            // initialize arrays and strings and first row
            initialize(sequenceA, sequenceB);

            // initialize table to store each row
            List<int[]> resultTable = new List<int[]>(charA.Length + 1);
            resultTable.Add(resultRow);

            // calculate each additional row (and save it for backtrace)
            for (int i = 0; i < charA.Length; i++)
                resultTable.Add(computeNextRow(resultTable[i], charA[i], charB));

            // compute back trace and generate final strings
            StringBuilder buildA = new StringBuilder();
            StringBuilder buildB = new StringBuilder();
            StringBuilder buildC = new StringBuilder();
            int row = charA.Length;
            int col = charB.Length;

            // while index pointers to string a (row) and string b (column) aren't at zero figure out last operation
            while (row != 0 || col != 0)
            {
                if (resultTable[row][col] == resultTable[row][col - 1] + INDEL)
                {
                    buildA.Append('-');
                    buildB.Append(charB[--col]);
                    buildC.Append(INDEL_CHAR);
                }
                else if (resultTable[row][col] == resultTable[row - 1][col] + INDEL)
                {
                    buildA.Append(charA[--row]);
                    buildB.Append('-');
                    buildC.Append(INDEL_CHAR);
                }
                else if (resultTable[row][col] == resultTable[row - 1][col - 1] + MATCH ||
                    resultTable[row][col] == resultTable[row - 1][col - 1] + SUB)
                {
                    buildA.Append(charA[--row]);
                    buildB.Append(charB[--col]);
                    buildC.Append(charB[col] == charA[row] ? MATCH_CHAR : SUB_CHAR);

                }
                else
                    throw new ArgumentException();
            }

            String[] results = new String[3];
            results[0] = reverseString(buildA.ToString());
            results[1] = reverseString(buildB.ToString());
            results[2] = reverseString(buildC.ToString());
            return results;
        }
        //Sets up the array with the initial values for sequencing
        private void initializeSequencing(GeneSequence sequenceA, GeneSequence sequenceB)
        {
            // grabs first 5000 characters of the sequences to evaluate
            charA     = stringLimit(sequenceA.Sequence, MaxCharactersToAlign).ToCharArray();
            charB     = stringLimit(sequenceB.Sequence, MaxCharactersToAlign).ToCharArray();
            resultRow = new int[charB.Length + 1];

            // initialize result row with costs for INDEL
            for (int i = 0; i < resultRow.Length; i++)
            {
                resultRow[i] = i * INDEL;
            }
        }
Esempio n. 22
0
        /////////////////////////////////////////////////////////////////////////////////////////////////////////////////
        /////////////////////////////////////////////////////////////////////////////////////////////////////////////////
        /////////////////////////////////////////////////////////////////////////////////////////////////////////////////

        /////////////////////////////////////////////////////////////////////////////////////////////////////////////////
        ///////////////////////////////////////// Unrestricted Algorithm ////////////////////////////////////////////////
        /////////////////////////////////////////////////////////////////////////////////////////////////////////////////

        /**
         * This function performs the unrestricted algorithm on the two sequences using dynamic programming to come up with
         * the best alignment for both.
         * Time Complexity: O(nm) where n is the length of the first sequence and m is the length of the second sequence. This
         *                   is because the algorithm iterates over all cells in the array of n x m
         * Space Complexity: O(nm) where n is the length of the first sequence and m is the length of the second sequence. This
         *                   is because the algorithm creates an array of n x m
         */
        void unrestrictedAlgorithm(ref int score, ref string[] alignment, ref GeneSequence sequenceA, ref GeneSequence sequenceB)
        {
            // Limiting the lengths of the sequences to the max characters to align
            int lengthOfSequenceA = Math.Min(sequenceA.Sequence.Length, MaxCharactersToAlign);
            int lengthOfSequenceB = Math.Min(sequenceB.Sequence.Length, MaxCharactersToAlign);

            // Create two arrays to hold the intermediate values and the alignment details
            int[,] values      = new int[lengthOfSequenceA + 1, lengthOfSequenceB + 1];
            directions[,] prev = new directions[lengthOfSequenceA + 1, lengthOfSequenceB + 1];

            // first fill first row and column with cost of inserts/deletes
            fillStartCells(ref values, ref prev, lengthOfSequenceA, lengthOfSequenceB, false);

            // Now iterate through the rest of the cells filling out the min value for each
            for (int row = 1; row < lengthOfSequenceA + 1; row++)
            {
                for (int column = 1; column < lengthOfSequenceB + 1; column++)
                {
                    // Compute values for each direction
                    int costOfTop_Delete  = values[row - 1, column] + 5;
                    int costOfLeft_Insert = values[row, column - 1] + 5;
                    // Compute cost of moving from diagonal depending on whether the letters match
                    int costOfMovingFromDiagonal = (sequenceA.Sequence[row - 1] == sequenceB.Sequence[column - 1]) ? -3 : 1;
                    int costOfDiagonal           = values[row - 1, column - 1] + costOfMovingFromDiagonal;

                    // value of cell would be the minimum cost out of the three directions
                    int costOfMin = Math.Min(costOfTop_Delete, Math.Min(costOfLeft_Insert, costOfDiagonal));
                    values[row, column] = costOfMin;

                    // Store the direction
                    if (costOfMin == costOfDiagonal)
                    {
                        prev[row, column] = directions.DIAGONAL;
                    }
                    else if (costOfMin == costOfLeft_Insert)
                    {
                        prev[row, column] = directions.LEFT;
                    }
                    else
                    {
                        prev[row, column] = directions.TOP;
                    }
                }
            }

            // score would be value of the last cell
            score = values[lengthOfSequenceA, lengthOfSequenceB];

            // Create the alignments
            createAlignments(ref alignment, ref prev, ref sequenceA, ref sequenceB, ref lengthOfSequenceA, ref lengthOfSequenceB);
        }
Esempio n. 23
0
        public int Align(GeneSequence sequenceA, GeneSequence sequenceB)
        {
            // initialize arrays and strings and first row
            initialize(sequenceA, sequenceB);

            // calculate each additional row
            for (int i = 0; i < charA.Length; i++)
            {
                resultRow = computeNextRow(resultRow, charA[i], charB);
            }

            // return score
            return(resultRow[resultRow.Length - 1]);
        }
        public void initialize(GeneSequence aSequence, GeneSequence bSequence)
        {
            X = aSequence.Sequence;
            Y = bSequence.Sequence;

            setDimensions();

            prev = new double[width];
            results = new double[width];

            for (int i = 1; i < width; ++i)
                results[i] = InsertDeleteCost * i;

            currentRow++;
            SwapArrays();
        }
Esempio n. 25
0
        public dpRows(GeneSequence aSequence, GeneSequence bSequence)
        {
            X = aSequence.Sequence;
            Y = bSequence.Sequence;

            setDimensions();

            prev = new double[width];
            results = new double[width];

            for (int i = 1; i < width; ++i)
                this.SetCell(i, InsertDeleteCost * i);

            currentRow++;
            this.SwapArrays();
        }
Esempio n. 26
0
        /// <summary>
        /// this is the function you implement.
        /// </summary>
        /// <param name="sequenceA">the first sequence</param>
        /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param>
        /// <param name="banded">true if alignment should be band limited.</param>
        /// <returns>the alignment score and the alignment (in a Result object) for sequenceA and sequenceB.  The calling function places the result in the dispay appropriately.
        ///
        public ResultTable.Result Align_And_Extract(GeneSequence sequenceA, GeneSequence sequenceB, bool banded)
        {
            ResultTable.Result result = new ResultTable.Result();
            int score;                                                       // place your computed alignment score here

            string[] alignment = new string[2];                              // place your two computed alignments here
            alignment[0] = alignment[1] = "";

            int maxLengthVal = banded ? 15001 : MaxCharactersToAlign;
            // If the sequences are longer than the desired alignment length, align only the desired amount.
            int rows = maxLengthVal < sequenceA.Sequence.Length + 1? maxLengthVal : sequenceA.Sequence.Length + 1;
            int cols = maxLengthVal < sequenceB.Sequence.Length + 1? maxLengthVal : sequenceB.Sequence.Length + 1;

            // Create the cost matrix and the matrix used to track the path.
            int[,] matrix = new int[rows, cols];
            int[,] prev   = new int[rows, cols];
            initializeMatrices(matrix, prev, rows, cols);

            // If it's not banded, do the unrestriced algorithm. Otherwise do banded.
            if (!banded)
            {
                unrestricted(matrix, prev, rows, cols, sequenceA, sequenceB);
            }
            else
            {
                bandedAlg(matrix, prev, rows, cols, sequenceA, sequenceB);
            }

            // The score is stored in the last cell.
            score = matrix[rows - 1, cols - 1];
            // Find the alignment strings by using the path stored in prev
            findAlignments(alignment, prev, rows, cols, score, sequenceA.Sequence, sequenceB.Sequence);

            // If the strings are too long to display, just display 100 characters.
            if (alignment[0].Length > 100)
            {
                alignment[0] = alignment[0].Substring(0, 100);
            }
            if (alignment[1].Length > 100)
            {
                alignment[1] = alignment[1].Substring(0, 100);
            }

            result.Update(score, alignment[0], alignment[1]);                  // bundling your results into the right object type
            return(result);
        }
Esempio n. 27
0
        public GenomeSequencer(GeneSequence sequenceA, GeneSequence sequenceB, int maxSize)
        {
            //sequence A will the the "top" word, so it will be the columns
            //sequence B will be the "side" word, so it will be the rows
            this.sequenceA = sequenceA;
            this.sequenceB = sequenceB;

            rowSize = Math.Min(maxSize, sequenceB.Sequence.Length) + 1; //add one to add base case row and column
            colSize = Math.Min(maxSize, sequenceA.Sequence.Length) + 1;

            //set up the sequencer arrays to be the size of the sequences to be used when calculating the cost of the sequences
            int alength = sequenceA.Sequence.Length;
            int blength = sequenceB.Sequence.Length;// used for debugging

            initializeAlignmentCost();
            initializePreviousMatrix();
        }
Esempio n. 28
0
        private void dataGridViewResults_CellClick(object sender, DataGridViewCellEventArgs e)
        {
            GeneSequence sequenceA = this.m_sequences[e.ColumnIndex];
            GeneSequence sequenceB = this.m_sequences[e.RowIndex];

            String[] results = processor.extractSequences(sequenceA, sequenceB);

            String outputText = "Output Console:";

            outputText += "\r\nCell (";
            outputText += (e.RowIndex + 1) + ", ";
            outputText += (e.ColumnIndex + 1) + ")";
            outputText += "\r\nSequence A: " + processor.stringLimit(results[0], 100);
            outputText += "\r\nSequence B: " + processor.stringLimit(results[1], 100);

            outputConsole.Text = outputText;
        }
Esempio n. 29
0
        public dpTable(GeneSequence aSequence, GeneSequence bSequence)
        {
            X = aSequence.Sequence;
            Y = bSequence.Sequence;

            setDimensions();

            results = new node[width, height];

            this.SetCell(0, 0, 0, "start");

            for (int i = 1; i < width; ++i)
                this.SetCell(i, 0, indel * i, "left");

            for (int j = 1; j < height; ++j)
                this.SetCell(0, j, indel * j, "top");
        }
Esempio n. 30
0
 public Algo(GeneSequence sequenceA, GeneSequence sequenceB, bool banded, int size)
 {
     seqA         = sequenceA;
     seqB         = sequenceB;
     this.banded  = banded;
     this.sizeRow = size + 1;
     this.sizeCol = size + 1;
     if (this.sizeRow > sequenceA.Sequence.Length)
     {
         this.sizeRow = sequenceA.Sequence.Length;
         this.sizeRow++;
     }
     if (this.sizeCol > sequenceB.Sequence.Length)
     {
         this.sizeCol = sequenceB.Sequence.Length;
         this.sizeCol++;
     }
     prev = new char[this.sizeRow, this.sizeCol];
     dis  = new int[this.sizeRow, this.sizeCol];
 }
Esempio n. 31
0
        /// <summary>
        /// this is the function you implement.
        /// </summary>
        /// <param name="sequenceA">the first sequence</param>
        /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param>
        /// <param name="banded">true if alignment should be band limited.</param>
        /// <returns>the alignment score and the alignment (in a Result object) for sequenceA and sequenceB.  The calling function places the result in the dispay appropriately.
        ///
        public ResultTable.Result Align_And_Extract(GeneSequence sequenceA, GeneSequence sequenceB, bool banded)
        {
            ResultTable.Result result = new ResultTable.Result();
            int score;                                                       // place your computed alignment score here

            string[] alignment = new string[2];                              // place your two computed alignments here

            // ********* these are placeholder assignments that you'll replace with your code  *******
            GenomeSequencer genomeSequencer = new GenomeSequencer(sequenceA, sequenceB, MaxCharactersToAlign);

            score = genomeSequencer.calculateSequenceCost(banded);
            //score = 0;
            alignment[0] = "";
            alignment[1] = "";
            // ***************************************************************************************


            result.Update(score, alignment[0], alignment[1]);                  // bundling your results into the right object type
            return(result);
        }
        //Dictionary<string, int> previouslyCalculatedValues = new Dictionary<string, int>();
        /// <summary>
        /// this is the function you implement.
        /// </summary>
        /// <param name="sequenceA">the first sequence</param>
        /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param>
        /// <param name="resultTableSoFar">the table of alignment results that has been generated so far using pair-wise alignment</param>
        /// <param name="rowInTable">this particular alignment problem will occupy a cell in this row the result table.</param>
        /// <param name="columnInTable">this particular alignment will occupy a cell in this column of the result table.</param>
        /// <returns>the alignment score for sequenceA and sequenceB.  The calling function places the result in entry rowInTable,columnInTable
        /// of the ResultTable</returns>
        public int Align(GeneSequence sequenceA, GeneSequence sequenceB, ResultTable resultTableSoFar, int rowInTable, int columnInTable)
        {
            // a place holder computation.  You'll want to implement your code here.
            //string key = sequenceA.Sequence.ToString() + sequenceB.Sequence.ToString();
            //if (previouslyCalculatedValues.ContainsKey(key))
            //    return previouslyCalculatedValues[key];

            // set up algorithm
            initializeSequencing(sequenceA, sequenceB);

            // calculate each additional row
            //Overall time complexity of this part is O(n^2) and space complexity is O(n)
            for (int i = 0; i < charA.Length; i++)                     // will go through the length of our frist sequence array size 0-5000 O(n)
            {
                resultRow = createNextRow(resultRow, charA[i], charB); //O(n)
            }
            // return score
            //previouslyCalculatedValues.Add(key,resultRow[resultRow.Length - 1]);
            return(resultRow[resultRow.Length - 1]);
        }
Esempio n. 33
0
        /// <summary>
        /// this is the function you implement.
        /// </summary>
        /// <param name="sequenceA">the first sequence</param>
        /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param>
        /// <param name="banded">true if alignment should be band limited.</param>
        /// <returns>the alignment score and the alignment (in a Result object) for sequenceA and sequenceB.  The calling function places the result in the dispay appropriately.
        /// 
        public ResultTable.Result Align_And_Extract(Tuple<int,int> cell, GeneSequence sequenceA, GeneSequence sequenceB, bool banded)
        {
            ResultTable.Result result = new ResultTable.Result();
            int score;                                                       // place your computed alignment score here
            string[] alignment = new string[2];                              // place your two computed alignments here


            // ********* these are placeholder assignments that you'll replace with your code  *******
            string a = sequenceA.Sequence.Substring(0, sequenceA.Sequence.Length < MaxCharactersToAlign ? sequenceA.Sequence.Length : MaxCharactersToAlign);
            string b = sequenceB.Sequence.Substring(0, sequenceB.Sequence.Length < MaxCharactersToAlign ? sequenceB.Sequence.Length : MaxCharactersToAlign);
            DynamicProgramming dp = new DynamicProgramming(cell, a, b, banded);
            score = dp.getScore();                                                
            alignment[0] = dp.getResultA();
            alignment[1] = dp.getResultB();
            // ***************************************************************************************
            

            result.Update(score,alignment[0],alignment[1]);                  // bundling your results into the right object type 
            return(result);
        }
Esempio n. 34
0
        /// <summary>
        /// this is the function you implement.
        /// </summary>
        /// <param name="sequenceA">the first sequence</param>
        /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param>
        /// <param name="banded">true if alignment should be band limited.</param>
        /// <returns>the alignment score and the alignment (in a Result object) for sequenceA and sequenceB.  The calling function places the result in the dispay appropriately.
        ///
        public ResultTable.Result Align_And_Extract(GeneSequence sequenceA, GeneSequence sequenceB, bool banded)
        {
            ResultTable.Result result = new ResultTable.Result();
            int score;                                                       // place your computed alignment score here

            string[] alignment = new string[2];                              // place your two computed alignments here
            alignment[0] = alignment[1] = "";

            int maxLengthVal = banded ? 15001 : MaxCharactersToAlign;

            int rows = maxLengthVal < sequenceA.Sequence.Length + 1? maxLengthVal : sequenceA.Sequence.Length + 1;
            int cols = maxLengthVal < sequenceB.Sequence.Length + 1? maxLengthVal : sequenceB.Sequence.Length + 1;

            int[,] matrix = new int[rows, cols];
            int[,] prev   = new int[rows, cols];
            initializeMatrices(matrix, prev, rows, cols);

            if (!banded)
            {
                unrestricted(matrix, prev, rows, cols, sequenceA, sequenceB);
            }
            else
            {
                bandedAlg(matrix, prev, rows, cols, sequenceA, sequenceB);
            }

            score = matrix[rows - 1, cols - 1];
            findAlignments(alignment, prev, rows, cols, score, sequenceA.Sequence, sequenceB.Sequence);

            if (alignment[0].Length > 100)
            {
                alignment[0] = alignment[0].Substring(0, 100);
            }
            if (alignment[1].Length > 100)
            {
                alignment[1] = alignment[1].Substring(0, 100);
            }

            result.Update(score, alignment[0], alignment[1]);                  // bundling your results into the right object type
            return(result);
        }
Esempio n. 35
0
        /// <summary>
        /// this is the function you implement.
        /// </summary>
        /// <param name="sequenceA">the first sequence</param>
        /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param>
        /// <param name="banded">true if alignment should be band limited.</param>
        /// <returns>the alignment score and the alignment (in a Result object) for sequenceA and sequenceB.  The calling function places the result in the dispay appropriately.
        ///
        public ResultTable.Result Align_And_Extract(GeneSequence sequenceA, GeneSequence sequenceB, bool banded)
        {
            ResultTable.Result result = new ResultTable.Result();
            int score;                                                       // place your computed alignment score here

            string[] alignment = new string[2];                              // place your two computed alignments here

            Algo algo = new Algo(sequenceA, sequenceB, false, 5000);

            algo.RunAlgo();
            algo.CalcStrings();
            //algo.PrintArray();
            // ********* these are placeholder assignments that you'll replace with your code  *******
            score        = algo.GetScore();
            alignment[0] = algo.GetRowString();
            alignment[1] = algo.GetColString();
            // ***************************************************************************************


            result.Update(score, alignment[0], alignment[1]);                  // bundling your results into the right object type
            return(result);
        }
Esempio n. 36
0
        /// <summary>
        /// this is the function you implement.
        /// </summary>
        /// <param name="sequenceA">the first sequence</param>
        /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param>
        /// <param name="banded">true if alignment should be band limited.</param>
        /// <returns>the alignment score and the alignment (in a Result object) for sequenceA and sequenceB.  The calling function places the result in the dispay appropriately.
        ///
        public ResultTable.Result Align_And_Extract(Tuple <int, int> cell, GeneSequence sequenceA, GeneSequence sequenceB, bool banded)
        {
            ResultTable.Result result = new ResultTable.Result();
            int score;                                                       // place your computed alignment score here

            string[] alignment = new string[2];                              // place your two computed alignments here


            // ********* these are placeholder assignments that you'll replace with your code  *******
            string             a  = sequenceA.Sequence.Substring(0, sequenceA.Sequence.Length < MaxCharactersToAlign ? sequenceA.Sequence.Length : MaxCharactersToAlign);
            string             b  = sequenceB.Sequence.Substring(0, sequenceB.Sequence.Length < MaxCharactersToAlign ? sequenceB.Sequence.Length : MaxCharactersToAlign);
            DynamicProgramming dp = new DynamicProgramming(cell, a, b, banded);

            score        = dp.getScore();
            alignment[0] = dp.getResultA();
            alignment[1] = dp.getResultB();
            // ***************************************************************************************


            result.Update(score, alignment[0], alignment[1]);                  // bundling your results into the right object type
            return(result);
        }
Esempio n. 37
0
        private int computeVal(int[,] matrix, int[,] prev, int row, int col, GeneSequence sequenceA, GeneSequence sequenceB)
        {
            char letterA  = sequenceA.Sequence[row - 1];
            char letterB  = sequenceB.Sequence[col - 1];
            int  diagVal  = letterA == letterB ? -3 : 1;
            int  indelVal = 5;

            if (matrix[row - 1, col - 1] + diagVal <= matrix[row - 1, col] + indelVal && matrix[row - 1, col - 1] + diagVal <= matrix[row, col - 1] + indelVal)
            {
                prev[row, col] = DIAG;
                return(matrix[row - 1, col - 1] + diagVal);
            }
            else if (matrix[row - 1, col] + indelVal < matrix[row - 1, col - 1] + diagVal && matrix[row - 1, col] + indelVal <= matrix[row, col - 1] + indelVal)
            {
                prev[row, col] = UP;
                return(matrix[row - 1, col] + indelVal);
            }
            else
            {
                prev[row, col] = LEFT;
                return(matrix[row, col - 1] + indelVal);
            }
        }
Esempio n. 38
0
        /// <summary>
        /// this is the function you implement.
        /// </summary>
        /// <param name="sequenceA">the first sequence</param>
        /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param>
        /// <param name="banded">true if alignment should be band limited.</param>
        /// <returns>the alignment score and the alignment (in a Result object) for sequenceA and sequenceB.  The calling function places the result in the dispay appropriately.
        ///
        public ResultTable.Result Align_And_Extract(GeneSequence sequenceA, GeneSequence sequenceB, bool banded)
        {
            ResultTable.Result result = new ResultTable.Result();
            int score;                                                       // place your computed alignment score here

            string[] alignment = new string[2];                              // place your two computed alignments here

            MyAligner aligner = new MyAligner(sequenceA.Sequence, sequenceB.Sequence, banded, MaxCharactersToAlign);

            ;
            aligner.ExecuteAlignment();


            // ********* these are placeholder assignments that you'll replace with your code  *******
            score        = aligner.GetCost();
            alignment[0] = aligner.GetAlignedSequenceA();
            alignment[1] = aligner.GetAlignedSequenceB();
            // ***************************************************************************************


            result.Update(score, alignment[0], alignment[1]);                  // bundling your results into the right object type
            return(result);
        }
Esempio n. 39
0
        public GeneSequence[] ReadGeneSequences(int max)
        {
            GeneSequence[] result;

            try
            {
                m_accessConn.Open();

                // Find number of problems
                OleDbCommand countSequencesCommand = new OleDbCommand("SELECT MAX(ID) FROM DNA", m_accessConn);
                int          sequenceCount         = (int)countSequencesCommand.ExecuteScalar();

                if (sequenceCount < max)
                {
                    result = new GeneSequence[sequenceCount];
                }
                else
                {
                    result = new GeneSequence[max];
                }

                // TODO: LIMIT the number of entries returned
                OleDbCommand    selectCommand = new OleDbCommand("SELECT TOP " + result.Length + " * FROM DNA ", m_accessConn);
                OleDbDataReader reader        = selectCommand.ExecuteReader();

                for (int i = 0; reader.Read() && i < result.Length; ++i)
                {
                    result[i] = new GeneSequence(reader.GetString(1), reader.GetString(2));
                }
            }
            finally
            {
                m_accessConn.Close();
            }

            return(result);
        }
Esempio n. 40
0
        public GeneSequence[] ReadGeneSequences(int max)
        {
            GeneSequence[] result;

            try
            {
                m_accessConn.Open();

                // Find number of problems
                OleDbCommand countSequencesCommand = new OleDbCommand("SELECT MAX(ID) FROM DNA", m_accessConn);
                int sequenceCount = (int)countSequencesCommand.ExecuteScalar();

                if (sequenceCount < max)
                    result = new GeneSequence[sequenceCount];
                else
                    result = new GeneSequence[max];

                // TODO: LIMIT the number of entries returned
                OleDbCommand selectCommand = new OleDbCommand("SELECT TOP " + result.Length + " * FROM DNA ", m_accessConn);
                OleDbDataReader reader = selectCommand.ExecuteReader();

                for (int i = 0; reader.Read() && i < result.Length; ++i)
                {
                    result[i] = new GeneSequence(reader.GetString(1), reader.GetString(2));
                }

            }
            finally
            {
                m_accessConn.Close();
            }

            return result;
        }
Esempio n. 41
0
 public void WriteGeneSequence(GeneSequence geneSequence)
 {
     //writemessage("writing to database...");
     try
     {
         // because the sequence can be so long, we need to use parameters
         //string insertCommandString = "INSERT INTO DNA (Name, Sequence) VALUES (?, ?)";
         string insertCommandString = "INSERT INTO DNA (Name, Sequence) VALUES (?, ?)";
         //string insertCommandString = "INSERT INTO DNA VALUES (3, ?, ?)";
         OleDbCommand insertCommand = new OleDbCommand(insertCommandString, m_accessConn);
         ASCIIEncoding encoding = new ASCIIEncoding();
         insertCommand.Parameters.Add(new OleDbParameter("name", geneSequence.Name));
         //insertCommand.Parameters.Add(new OleDbParameter("name", encoding.GetBytes(geneSequence.Name.ToCharArray())));
         int a = geneSequence.Name.Length;
         //insertCommand.Parameters.Add(new OleDbParameter("sequence", geneSequence.Sequence.ToCharArray()));
         insertCommand.Parameters.Add(new OleDbParameter("sequence", geneSequence.Sequence));
         m_accessConn.Open();
         insertCommand.ExecuteNonQuery();
     }
     /*catch (Exception e)
     {
         //writemessage("Error trying to write the results to the database");
         //writemessage(e.ToString());
         return;
     }*/
     finally
     {
         m_accessConn.Close();
     }
     //writemessage("done writing to database.  See the row in the tProblems table with problem = " + currentProblem + " to see what happened.");
 }
Esempio n. 42
0
        private GeneSequence[] loadFile(string fileName)
        {
            StreamReader reader = new StreamReader(fileName);
            string input = "";

            try
            {
                input = reader.ReadToEnd();
            }
            catch
            {
                Console.WriteLine("Error Parsing File...");
                return null;
            }
            finally
            {
                reader.Close();
            }

            GeneSequence[] temp = new GeneSequence[NUMBER_OF_SEQUENCES];
            string[] inputLines = input.Split('\r');

            for (int i = 0; i < NUMBER_OF_SEQUENCES; i++)
            {
                string[] line = inputLines[i].Replace("\n","").Split('#');
                temp[i] = new GeneSequence(line[0], line[1]);
            }
            return temp;
        }
Esempio n. 43
0
        /////////////////////////////////////////////////////////////////////////////////////////////////////////////////
        /////////////////////////////////////////////////////////////////////////////////////////////////////////////////
        /////////////////////////////////////////////////////////////////////////////////////////////////////////////////

        /////////////////////////////////////////////////////////////////////////////////////////////////////////////////
        ///////////////////////////////////////// Banded Algorithm //////////////////////////////////////////////////////
        /////////////////////////////////////////////////////////////////////////////////////////////////////////////////
        /**
        * This function performs the banded algorithm on the two sequences using dynamic programming to come up with
        * the best alignment for both. The band is set to whatever the distance is. Currently it is d = 3 which makes the
        * bandwidth equals 2d+1 = 7.
        * Time Complexity: O(n+m) where n is the length of the first sequence and m is the length of the second sequence. This
        *                   is because the algorithm iterates over a specific number of cells for each row and column. As we don't
        *                   care about constants, the time would depend on the length of sequence A and B. Meaning each time
        *                   the array size is increased by a row or a column, we have to compute those bandwidth number of cells
        *                   again, so it is O(n+m).
        * Space Complexity: O(nm) where n is the length of the first sequence and m is the length of the second sequence. This
        *                   is because the algorithm creates an array of n x m 
        */
        void bandedAlgorithm(ref int score, ref string[] alignment, ref GeneSequence sequenceA, ref GeneSequence sequenceB)
        {

            // Limiting the lengths of the sequences to the max characters to align
            int lengthOfSequenceA = Math.Min(sequenceA.Sequence.Length, MaxCharactersToAlign);
            int lengthOfSequenceB = Math.Min(sequenceB.Sequence.Length, MaxCharactersToAlign);

            // Create two arrays to hold the intermediate values and the alignment details
            int[,] values = new int[lengthOfSequenceA + 1, lengthOfSequenceB + 1];
            directions[,] prev = new directions[lengthOfSequenceA + 1, lengthOfSequenceB + 1];

            // first fill first row and column with cost of inserts/deletes
            fillStartCells(ref values, ref prev, lengthOfSequenceA, lengthOfSequenceB, true);

            int columnStart = 1;
            bool alignmentFound = false;
            int row = 1;
            int column = columnStart;
            // Now iterate through the rest of the cells filling out the min value for each
            for (row = 1; row < lengthOfSequenceA + 1; row++)
            {
                for (column = columnStart; column < lengthOfSequenceB + 1; column++)
                {
                    if ((distance + row) < column)
                    {
                        break;
                    }
                    // Compute values for each direction
                    int costOfTop_Delete = values[row - 1, column] + 5;
                    if ((distance + row) == column)
                    {
                        costOfTop_Delete = int.MaxValue;
                    }
                    int costOfLeft_Insert = values[row, column - 1] + 5;
                    if ((distance + column) == row)
                    {
                        costOfLeft_Insert = int.MaxValue;
                    }
                    // Compute cost of moving from diagonal depending on whether the letters match
                    int costOfMovingFromDiagonal = (sequenceA.Sequence[row - 1] == sequenceB.Sequence[column - 1]) ? -3 : 1;
                    int costOfDiagonal = values[row - 1, column - 1] + costOfMovingFromDiagonal;

                    // value of cell would be the minimum cost out of the three directions
                    int costOfMin = Math.Min(costOfDiagonal, Math.Min(costOfLeft_Insert, costOfTop_Delete));
                    values[row, column] = costOfMin;

                    // Store the direction
                    if (costOfMin == costOfDiagonal)
                    {
                        prev[row, column] = directions.DIAGONAL;
                    }
                    else if (costOfMin == costOfLeft_Insert)
                    {
                        prev[row, column] = directions.LEFT;
                    }
                    else
                    {
                        prev[row, column] = directions.TOP;
                    }
                    if (column == lengthOfSequenceB && row == lengthOfSequenceA)
                        alignmentFound = true;
                }
                if (row > distance)
                    columnStart++;
            }
           
            // score would be value of the last cell
            if (alignmentFound)
            {
                score = values[lengthOfSequenceA, lengthOfSequenceB];
                // Create the alignments
                createAlignments(ref alignment, ref prev, ref sequenceA, ref sequenceB, 
                                                ref lengthOfSequenceA, ref lengthOfSequenceB);

            }
            else {
                score = int.MaxValue;
                alignment[0] = "No Alignment Possible";
                alignment[1] = "No Alignment Possible";
            }
        }
Esempio n. 44
0
 private void initialize(GeneSequence sequenceA, GeneSequence sequenceB)
 {
     // grab first 5000 (or all if less than 5000) characters of each sequence
     charA = formatSequence(sequenceA.Sequence, MaxCharactersToAlign).ToCharArray();
     charB = formatSequence(sequenceB.Sequence, MaxCharactersToAlign).ToCharArray();
     resultRow = new int[charB.Length + 1];
     // initialize bottom row with costs for INDEL
     for (int i = 0; i < resultRow.Length; i++)
         resultRow[i] = i * INDEL;
 }
Esempio n. 45
0
        /////////////////////////////////////////////////////////////////////////////////////////////////////////////////
        /////////////////////////////////////////////////////////////////////////////////////////////////////////////////
        /////////////////////////////////////////////////////////////////////////////////////////////////////////////////

        /////////////////////////////////////////////////////////////////////////////////////////////////////////////////
        ///////////////////////////////////////// Unrestricted Algorithm ////////////////////////////////////////////////
        /////////////////////////////////////////////////////////////////////////////////////////////////////////////////
        /**
        * This function performs the unrestricted algorithm on the two sequences using dynamic programming to come up with
        * the best alignment for both.
        * Time Complexity: O(nm) where n is the length of the first sequence and m is the length of the second sequence. This
        *                   is because the algorithm iterates over all cells in the array of n x m
        * Space Complexity: O(nm) where n is the length of the first sequence and m is the length of the second sequence. This
        *                   is because the algorithm creates an array of n x m 
        */
        void unrestrictedAlgorithm (ref int score, ref string[] alignment, ref GeneSequence sequenceA, ref GeneSequence sequenceB)
        {
            // Limiting the lengths of the sequences to the max characters to align
            int lengthOfSequenceA = Math.Min(sequenceA.Sequence.Length, MaxCharactersToAlign);
            int lengthOfSequenceB = Math.Min(sequenceB.Sequence.Length, MaxCharactersToAlign);

            // Create two arrays to hold the intermediate values and the alignment details
            int[,] values = new int[lengthOfSequenceA + 1, lengthOfSequenceB + 1];
            directions[,] prev = new directions[lengthOfSequenceA + 1, lengthOfSequenceB + 1];

            // first fill first row and column with cost of inserts/deletes
            fillStartCells(ref values, ref prev, lengthOfSequenceA, lengthOfSequenceB, false);

            // Now iterate through the rest of the cells filling out the min value for each
            for (int row = 1; row < lengthOfSequenceA + 1; row++)
            {
                for (int column = 1; column < lengthOfSequenceB + 1; column++)
                {
                    // Compute values for each direction
                    int costOfTop_Delete = values[row - 1, column] + 5;
                    int costOfLeft_Insert = values[row, column - 1] + 5;
                    // Compute cost of moving from diagonal depending on whether the letters match
                    int costOfMovingFromDiagonal = (sequenceA.Sequence[row - 1] == sequenceB.Sequence[column - 1]) ? -3 : 1;
                    int costOfDiagonal = values[row - 1, column - 1] + costOfMovingFromDiagonal;

                    // value of cell would be the minimum cost out of the three directions
                    int costOfMin = Math.Min(costOfTop_Delete, Math.Min(costOfLeft_Insert, costOfDiagonal));
                    values[row, column] = costOfMin;

                    // Store the direction
                    if (costOfMin == costOfDiagonal)
                    {
                        prev[row, column] = directions.DIAGONAL;
                    }
                    else if (costOfMin == costOfLeft_Insert)
                    {
                        prev[row, column] = directions.LEFT;
                    }
                    else
                    {
                        prev[row, column] = directions.TOP;
                    }
                }
            }

            // score would be value of the last cell
            score = values[lengthOfSequenceA, lengthOfSequenceB];

            // Create the alignments
            createAlignments(ref alignment, ref prev, ref sequenceA, ref sequenceB, ref lengthOfSequenceA, ref lengthOfSequenceB);
            
        }
Esempio n. 46
0
 /// <summary>
 /// this is the function you implement.
 /// </summary>
 /// <param name="sequenceA">the first sequence</param>
 /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param>
 /// <param name="resultTableSoFar">the table of alignment results that has been generated so far using pair-wise alignment</param>
 /// <param name="rowInTable">this particular alignment problem will occupy a cell in this row the result table.</param>
 /// <param name="columnInTable">this particular alignment will occupy a cell in this column of the result table.</param>
 /// <returns>the alignment score for sequenceA and sequenceB.  The calling function places the result in entry rowInTable,columnInTable
 /// of the ResultTable</returns>
 public int Align(GeneSequence sequenceA, GeneSequence sequenceB, ResultTable resultTableSoFar, int rowInTable, int columnInTable)
 {
     // a place holder computation.  You'll want to implement your code here.
     return (Math.Abs(sequenceA.Sequence.Length - sequenceB.Sequence.Length));
 }
Esempio n. 47
0
        /////////////////////////////////////////////////////////////////////////////////////////////////////////////////
        /////////////////////////////////////////////////////////////////////////////////////////////////////////////////
        /////////////////////////////////////////////////////////////////////////////////////////////////////////////////
        /// <summary>
        /// this is the function you implement.
        /// </summary>
        /// <param name="sequenceA">the first sequence</param>
        /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param>
        /// <param name="banded">true if alignment should be band limited.</param>
        /// <returns>the alignment score and the alignment (in a Result object) for sequenceA and sequenceB.  The calling function places the result in the dispay appropriately.
        /// 
        public ResultTable.Result Align_And_Extract(GeneSequence sequenceA, GeneSequence sequenceB, bool banded)
        {
            ResultTable.Result result = new ResultTable.Result();
            int score;                                                       // place your computed alignment score here
            string[] alignment = new string[2];                              // place your two computed alignments here


            // ********* these are placeholder assignments that you'll replace with your code  *******
            score = 0;                                                
            alignment[0] = "";
            alignment[1] = "";
            // ***************************************************************************************
            if (!banded)
                unrestrictedAlgorithm(ref score, ref alignment, ref sequenceA, ref sequenceB);
            else
                bandedAlgorithm(ref score, ref alignment, ref sequenceA, ref sequenceB);

            result.Update(score, alignment[0], alignment[1]);                  // bundling your results into the right object type 
            return (result);
        }