Beispiel #1
0
        public MainForm()
        {
            InitializeComponent();

            statusMessage.Text = "Loading Database...";

            // load database here

            try
            {
                m_sequences = loadFile("../../" + GENOME_FILE);
            }
            catch (FileNotFoundException e)
            {
                try // Failed, try one level down...
                {
                    m_sequences = loadFile("../" + GENOME_FILE);
                }
                catch (FileNotFoundException e2)
                {
                    // Failed, try same folder
                    m_sequences = loadFile(GENOME_FILE);
                }
            }

            m_resultTable = new ResultTable(this.dataGridViewResults, NUMBER_OF_SEQUENCES);

            statusMessage.Text = "Loaded Database.";
        }
Beispiel #2
0
        public MainForm()
        {
            InitializeComponent();

            statusMessage.Text = "Loading Database...";

            // load database here

            try
            {
                m_sequences = loadFile("../../" + GENOME_FILE);
            }
            catch (FileNotFoundException e)
            {
                try // Failed, try one level down...
                {
                    m_sequences = loadFile("../" + GENOME_FILE);
                }
                catch (FileNotFoundException e2)
                {
                    // Failed, try same folder
                    m_sequences = loadFile(GENOME_FILE);
                }
            }

            m_resultTable = new ResultTable(this.dataGridViewResults, NUMBER_OF_SEQUENCES);

            statusMessage.Text = "Loaded Database.";
        }
        /// <summary>
        /// this is the function you implement.
        /// </summary>
        /// <param name="sequenceA">the first sequence</param>
        /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param>
        /// <param name="resultTableSoFar">the table of alignment results that has been generated so far using pair-wise alignment</param>
        /// <param name="rowInTable">this particular alignment problem will occupy a cell in this row the result table.</param>
        /// <param name="columnInTable">this particular alignment will occupy a cell in this column of the result table.</param>
        /// <returns>the alignment score for sequenceA and sequenceB.  The calling function places the result in entry rowInTable,columnInTable
        /// of the ResultTable</returns>
        public int Score(GeneSequence sequenceA, GeneSequence sequenceB, ResultTable resultTableSoFar, int rowInTable, int columnInTable)
        {
            initialize(sequenceA, sequenceB);

            for (int i = 1; i < height; i++)
            {
                for (int j = 1; j < width; j++)
                {
                    int diagCellCost = 0;
                    if (X[j - 1] == Y[i - 1])
                        diagCellCost = CharsMatchCost;
                    else
                        diagCellCost = SubstitutionCost;

                    double topCell = prev[j] + InsertDeleteCost;
                    double leftCell = results[j - 1] + InsertDeleteCost;
                    double diagCell = prev[j - 1] + diagCellCost;

                    double min = Math.Min(topCell, Math.Min(diagCell, leftCell));

                    results[j] = min;
                }

                currentRow++;
                SwapArrays();
            }

            return (int)prev[width - 1];
        }
        /// <summary>
        /// This is the function you implement.
        /// </summary>
        /// <param name="sequenceA">the first sequence</param>
        /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param>
        /// <param name="resultTableSoFar">the table of alignment results that has been generated so far using pair-wise alignment</param>
        /// <param name="rowInTable">this particular alignment problem will occupy a cell in this row the result table.</param>
        /// <param name="columnInTable">this particular alignment will occupy a cell in this column of the result table.</param>
        /// <returns>the alignment score for sequenceA and sequenceB.  The calling function places the result in entry rowInTable,columnInTable
        /// of the ResultTable</returns>
        public int Align(GeneSequence sequenceA, GeneSequence sequenceB, ResultTable resultTableSoFar, int rowInTable, int columnInTable)
        {
            if((columnInTable - rowInTable) < 0)
            {
                return 0;
            }

            Grid grid = new Grid(sequenceA.Sequence, sequenceB.Sequence, true, MaxCharactersToAlign);
            return grid.CalculateScoreSolution();
        }
Beispiel #5
0
        /// <summary>
        /// this is the function you implement.
        /// </summary>
        /// <param name="sequenceA">the first sequence</param>
        /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param>
        /// <param name="resultTableSoFar">the table of alignment results that has been generated so far using pair-wise alignment</param>
        /// <param name="rowInTable">this particular alignment problem will occupy a cell in this row the result table.</param>
        /// <param name="columnInTable">this particular alignment will occupy a cell in this column of the result table.</param>
        /// <returns>the alignment score for sequenceA and sequenceB.  The calling function places the result in entry rowInTable,columnInTable
        /// of the ResultTable</returns>
        public int Align(GeneSequence sequenceA, GeneSequence sequenceB, ResultTable resultTableSoFar, int rowInTable, int columnInTable)
        {
            // Only fill in above the diagonal
            if (rowInTable <= columnInTable)
            {
                return(0);
            }

            string a = sequenceA.Sequence;
            string b = sequenceB.Sequence;
            int    m = Math.Min(a.Length, MaxCharactersToAlign);
            int    n = Math.Min(b.Length, MaxCharactersToAlign);

            int[][] E = new int[2][];
            E[0] = new int[MaxCharactersToAlign + 1];
            E[1] = new int[MaxCharactersToAlign + 1];

            // Initialize first row with cost of indels
            for (int j = 0; j <= n; j++)
            {
                E[0][j] = 5 * j;
            }

            int previous = 0;
            int active   = 1;

            for (int i = 1; i <= m; i++)
            {
                for (int j = 0; j <= n; j++)
                {
                    var indels = E[previous][j] + 5;

                    if (j == 0)
                    {
                        // If first element, only have one option
                        E[active][j] = indels;
                    }
                    else
                    {
                        // Get the minimum cost from the two available indels and the diagonal match/sub
                        indels = Math.Min(E[active][j - 1] + 5, indels);
                        var diff = (a[i - 1] == b[j - 1]) ? -3 : 1;
                        E[active][j] = Math.Min(indels, E[previous][j - 1] + diff);
                    }
                }

                // Swap active and previous (new active will get overwritten)
                active   = (active == 0) ? 1 : 0;
                previous = (previous == 0) ? 1 : 0;
            }

            // Return last element in last filled row
            return(E[previous][n]);
        }
Beispiel #6
0
 public MainForm()
 {
     InitializeComponent();
     m_dbController = new DatabaseController();
     m_dbController.EstablishConnection("../../db1.mdb");
     statusMessage.Text = "Loading Database...";
     // Set the number of Sequences to load below.
     m_sequences = m_dbController.ReadGeneSequences(10);
     m_resultTable = new ResultTable(this.dataGridViewResults, m_sequences.Length);
     statusMessage.Text = "Loaded Database.";
     processor = new PairWiseAlign();
 }
Beispiel #7
0
 public MainForm()
 {
     InitializeComponent();
     m_dbController = new DatabaseController();
     m_dbController.EstablishConnection("../../db1.mdb");
     statusMessage.Text = "Loading Database...";
     // Set the number of Sequences to load below.
     m_sequences        = m_dbController.ReadGeneSequences(10);
     m_resultTable      = new ResultTable(this.dataGridViewResults, m_sequences.Length);
     statusMessage.Text = "Loaded Database.";
     processor          = new PairWiseAlign();
 }
Beispiel #8
0
        public MainForm()
        {
            bool failed = false;

            InitializeComponent();

            statusMessage.Text = "Loading Database...";

            // load database here

            try
            {
                m_sequences = loadFile("../../" + GENOME_FILE);
            }
            catch (FileNotFoundException)
            {
                try // Failed, try one level down...
                {
                    m_sequences = loadFile("../" + GENOME_FILE);
                }
                catch (FileNotFoundException)
                {
                    try // Failed, try same folder
                    {
                        m_sequences = loadFile(GENOME_FILE);
                    }
                    catch
                    {
                        statusMessage.Text = "Failed to load database: " + GENOME_FILE + " not found.";
                        Refresh();
                        failed = true;
                    }
                }
            }

            if (!failed)
            {
                string [] names = new string[NUMBER_OF_SEQUENCES];

                for (int i = 0; i < NUMBER_OF_SEQUENCES; i++)
                {
                    names[i] = m_sequences[i].Name;
                }
                m_resultTable      = new ResultTable(this.dataGridViewResults, names);
                statusMessage.Text = "Loaded Database.";
            }
            else
            {
                processButton.Enabled = false;
            }
        }
Beispiel #9
0
        public MainForm()
        {
            bool failed = false;

            InitializeComponent();

            statusMessage.Text = "Loading Database...";

            // load database here

            try
            {
                m_sequences = loadFile("../../" + GENOME_FILE);
            }
            catch (FileNotFoundException)
            {
                try // Failed, try one level down...
                {
                    m_sequences = loadFile("../" + GENOME_FILE);
                }
                catch (FileNotFoundException)
                {
                    try // Failed, try same folder
                    {
                        m_sequences = loadFile(GENOME_FILE);
                    }
                    catch 
                    {
                        statusMessage.Text = "Failed to load database: " + GENOME_FILE + " not found.";
                        Refresh();
                        failed = true;
                    }
                }
            }

            if (!failed)
            {   
                string [] names = new string[NUMBER_OF_SEQUENCES];

                for (int i = 0; i < NUMBER_OF_SEQUENCES; i++)
                {
                    names[i] = m_sequences[i].Name;
                }
                m_resultTable = new ResultTable(this.dataGridViewResults, names);
                statusMessage.Text = "Loaded Database.";
            }
            else
            {
                processButton.Enabled = false;
            }
        }
        //Dictionary<string, int> previouslyCalculatedValues = new Dictionary<string, int>();
        /// <summary>
        /// this is the function you implement.
        /// </summary>
        /// <param name="sequenceA">the first sequence</param>
        /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param>
        /// <param name="resultTableSoFar">the table of alignment results that has been generated so far using pair-wise alignment</param>
        /// <param name="rowInTable">this particular alignment problem will occupy a cell in this row the result table.</param>
        /// <param name="columnInTable">this particular alignment will occupy a cell in this column of the result table.</param>
        /// <returns>the alignment score for sequenceA and sequenceB.  The calling function places the result in entry rowInTable,columnInTable
        /// of the ResultTable</returns>
        public int Align(GeneSequence sequenceA, GeneSequence sequenceB, ResultTable resultTableSoFar, int rowInTable, int columnInTable)
        {
            // a place holder computation.  You'll want to implement your code here.
            //string key = sequenceA.Sequence.ToString() + sequenceB.Sequence.ToString();
            //if (previouslyCalculatedValues.ContainsKey(key))
            //    return previouslyCalculatedValues[key];

            // set up algorithm
            initializeSequencing(sequenceA, sequenceB);

            // calculate each additional row
            //Overall time complexity of this part is O(n^2) and space complexity is O(n)
            for (int i = 0; i < charA.Length; i++)                     // will go through the length of our frist sequence array size 0-5000 O(n)
            {
                resultRow = createNextRow(resultRow, charA[i], charB); //O(n)
            }
            // return score
            //previouslyCalculatedValues.Add(key,resultRow[resultRow.Length - 1]);
            return(resultRow[resultRow.Length - 1]);
        }
        public GeneNode Align(GeneSequence sequenceA, GeneSequence sequenceB, ResultTable resultTableSoFar, int rowInTable, int columnInTable)
        {
            // Check for equal alignment
            if (rowInTable == columnInTable)
            {
                return(new GeneNode(null, '0', '0', 0));
            }

            // Linear space requirement
            var resultSet1 = new List <GeneNode>(); // Prev Row
            var resultSet2 = new List <GeneNode>(); // Current Row

            // New Sequences
            string seqA = "0" + sequenceA.Sequence;
            string seqB = "0" + sequenceB.Sequence;

            // Clean up the length to cap at 5000
            int seqALength = seqA.Length;
            int seqBLength = seqB.Length;

            if (seqA.Length > MaxCharactersToAlign + 1)
            {
                seqALength = MaxCharactersToAlign + 1;
            }
            if (seqB.Length > MaxCharactersToAlign + 1)
            {
                seqBLength = MaxCharactersToAlign + 1;
            }

            // Core alignment algorithm
            for (int i = 0; i < seqALength; i++)
            {
                for (int j = 0; j < seqBLength; j++)
                {
                    GeneNode acc = null;
                    // Starting position
                    if (i == 0 && j == 0)
                    {
                        acc = new GeneNode(null, '0', '0', 0);
                    }
                    // Edge case
                    else if (i == 0 && j > 0)
                    {
                        // Get the node from the left cell, and add as indel
                        acc = new GeneNode(resultSet2[j - 1], '-', seqB[j], (resultSet2[j - 1].Cost + indel));
                    }
                    // Edge case
                    else if (i > 0 && j == 0)
                    {
                        // Get the node from the top cell, and add as indel
                        acc = new GeneNode(resultSet1[j], seqA[i], '-', (resultSet1[j].Cost + indel));
                    }

                    // Match Case
                    else if ((i > 0 && j > 0) && (seqA[i] == seqB[j]))
                    {
                        // Match or indel
                        var top  = new GeneNode(resultSet1[j], seqA[i], '-', (resultSet1[j].Cost + indel));
                        var left = new GeneNode(resultSet2[j - 1], '-', seqB[j], (resultSet2[j - 1].Cost + indel));
                        var diag = new GeneNode(resultSet1[j - 1], seqA[i], seqB[j], (resultSet1[j - 1].Cost + match));
                        // Get node neighbor with smallest cost in order of: left, top, diag
                        acc = GetSmallest(left, top, diag);
                    }

                    // Subsitution Case
                    else if ((i > 0 && j > 0) && (seqA[i] != seqB[j]))
                    {
                        // Sub or indel
                        var top  = new GeneNode(resultSet1[j], seqA[i], '-', (resultSet1[j].Cost + indel));
                        var left = new GeneNode(resultSet2[j - 1], '-', seqB[j], (resultSet2[j - 1].Cost + indel));
                        var diag = new GeneNode(resultSet1[j - 1], seqA[i], seqB[j], (resultSet1[j - 1].Cost + sub));
                        // Get node neighbor with smallest cost in order of: left, top, diag
                        acc = GetSmallest(left, top, diag);
                    }
                    // Add to lower row (current row) results
                    resultSet2.Add(acc);
                }

                // Make lower row the new upper row and clear the old lower row
                resultSet1 = resultSet2;
                resultSet2 = new List <GeneNode>();
            }
            // return the node at furthest (col, row) from origin
            return(resultSet1[resultSet1.Count - 1]);
        }
        /// <summary>
        /// this is the function you implement.
        /// </summary>
        /// <param name="sequenceA">the first sequence</param>
        /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param>
        /// <param name="resultTableSoFar">the table of alignment results that has been generated so far using pair-wise alignment</param>
        /// <param name="rowInTable">this particular alignment problem will occupy a cell in this row the result table.</param>
        /// <param name="columnInTable">this particular alignment will occupy a cell in this column of the result table.</param>
        /// <returns>the alignment score for sequenceA and sequenceB.  The calling function places the result in entry rowInTable,columnInTable
        /// of the ResultTable</returns>
        public int Align(GeneSequence sequenceA, GeneSequence sequenceB, ResultTable resultTableSoFar, int rowInTable, int columnInTable)
        {
            // Check for equal alignment
            if (rowInTable == columnInTable)
            {
                return(0);
            }

            // Check Cache
            if (InCache(rowInTable, columnInTable))
            {
                return(cache[rowInTable][columnInTable]);
            }

            // Linear space requirement
            resultSet1 = new List <int>(); // Prev Row
            resultSet2 = new List <int>(); // Current Row

            // New Sequences
            string seqA = "0" + sequenceA.Sequence;
            string seqB = "0" + sequenceB.Sequence;

            // Clean up the length to cap at 5000
            int seqALength = seqA.Length;
            int seqBLength = seqB.Length;

            if (seqA.Length > MaxCharactersToAlign + 1)
            {
                seqALength = MaxCharactersToAlign + 1;
            }
            if (seqB.Length > MaxCharactersToAlign + 1)
            {
                seqBLength = MaxCharactersToAlign + 1;
            }

            // Core iteration
            for (int i = 0; i < seqALength; i++)
            {
                for (int j = 0; j < seqBLength; j++)
                {
                    int cost = 0;
                    // First cell, no cost value
                    if (i == 0 && j == 0)
                    {
                        cost = 0;
                    }
                    // Get the cost from the left cell, and add as indel
                    else if (i == 0 && j > 0)
                    {
                        cost = resultSet2[j - 1] + indel;
                    }
                    // Get the cost from the top cell, and add as indel
                    else if (i > 0 && j == 0)
                    {
                        cost = resultSet1[j] + indel;
                    }
                    else if ((i > 0 && j > 0) && (seqA[i] == seqB[j]))
                    {
                        // Match or indel
                        int top  = resultSet1[j] + indel;
                        int left = resultSet2[j - 1] + indel;
                        int diag = resultSet1[j - 1] + match;
                        // Get smallest costs in order of: top, left, diag
                        cost = GetSmallest(top, left, diag);
                    }
                    else if ((i > 0 && j > 0) && (seqA[i] != seqB[j]))
                    {
                        // Sub or indel
                        int top  = resultSet1[j] + indel;
                        int left = resultSet2[j - 1] + indel;
                        int diag = resultSet1[j - 1] + sub;
                        // Get smallest costs in order of: top, left, diag
                        cost = GetSmallest(top, left, diag);
                    }
                    // Add to the lower row.. the result set.
                    resultSet2.Add(cost);
                }

                // Make the lower row the new upper row
                resultSet1 = resultSet2;
                // Clear the old lower row
                resultSet2 = new List <int>();
            }

            // Get result
            int result = resultSet1[resultSet1.Count - 1];

            // Add to the cache
            AddToCache(rowInTable, columnInTable, result);
            AddToCache(columnInTable, rowInTable, result);

            // Return the new calculated cost
            return(result);
        }
Beispiel #13
0
 /// <summary>
 /// this is the function you implement.
 /// </summary>
 /// <param name="sequenceA">the first sequence</param>
 /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param>
 /// <param name="resultTableSoFar">the table of alignment results that has been generated so far using pair-wise alignment</param>
 /// <param name="rowInTable">this particular alignment problem will occupy a cell in this row the result table.</param>
 /// <param name="columnInTable">this particular alignment will occupy a cell in this column of the result table.</param>
 /// <returns>the alignment score for sequenceA and sequenceB.  The calling function places the result in entry rowInTable,columnInTable
 /// of the ResultTable</returns>
 public int Align(GeneSequence sequenceA, GeneSequence sequenceB, ResultTable resultTableSoFar, int rowInTable, int columnInTable)
 {
     // a place holder computation.  You'll want to implement your code here.
     return (Math.Abs(sequenceA.Sequence.Length - sequenceB.Sequence.Length));
 }
Beispiel #14
0
        /// <summary>
        /// this is the function you implement.
        /// </summary>
        /// <param name="sequenceA">the first sequence</param>
        /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param>
        /// <param name="resultTableSoFar">the table of alignment results that has been generated so far using pair-wise alignment</param>
        /// <param name="rowInTable">this particular alignment problem will occupy a cell in this row the result table.</param>
        /// <param name="columnInTable">this particular alignment will occupy a cell in this column of the result table.</param>
        /// <returns>the alignment score for sequenceA and sequenceB.  The calling function places the result in entry rowInTable,columnInTable
        /// of the ResultTable</returns>
        public int Align(GeneSequence sequenceA, GeneSequence sequenceB, ResultTable resultTableSoFar, int rowInTable, int columnInTable)
        {
            string a;
            string b;

            //limit the number of characters in each string to 5000....O(1)
            if (sequenceA.Sequence.Length > 5000)
            {
                a = sequenceA.Sequence.Substring(0, 5000);
            }
            else
            {
                a = sequenceA.Sequence;
            }

            if (sequenceB.Sequence.Length > 5000)
            {
                b = sequenceB.Sequence.Substring(0, 5000);
            }
            else
            {
                b = sequenceB.Sequence;
            }



            int[] topArray = new int[a.Length + 1];

            //top Array begins as the top row of our table and is hard coded....O(n)
            for (int i = 0; i < topArray.Length; i++)
            {
                topArray[i] = i * 5;
            }

            //bottom Array is where calculated scores are put
            int[] bottomArray = new int[a.Length + 1];

            int rowCount = 1;

            //outer loop pulls out one character at a time, represented by one row in the table
            //O(n) * inside the loop
            //So the total is O(n^2)
            foreach (char letter in b)
            {
                //the first column is hard coded...O(1)
                bottomArray[0] = rowCount * 5;
                rowCount++;

                //inner loop iterates through each column, calculates the score
                //the inside calculations are constant, so O(n)
                for (int i = 1; i < a.Length + 1; i++)
                {
                    int diff;   //whether or not the characters in the row/col match

                    if (letter == a[i - 1])
                    {
                        diff = -3;
                    }
                    else
                    {
                        diff = 1;
                    }

                    //the score algorithm based on dynammic progamming and Needleman-Wunsch
                    int score = scoreMin(diff + topArray[i - 1], 5 + topArray[i], 5 + bottomArray[i - 1]);
                    bottomArray[i] = score;
                }

                //the pointers to the arrays are switched so the next row can be calculated based on
                //the row above it.  Bottom Array will be written over in the next loop
                //Using the two array method gives us O(n) space complexity
                int[] tempArray = topArray;
                topArray    = bottomArray;
                bottomArray = tempArray;
            }

            //At this point the last index in topArray has the alignment score
            return(topArray[a.Length]);
        }
Beispiel #15
0
 /// <summary>
 /// this is the function you implement.
 /// </summary>
 /// <param name="sequenceA">the first sequence</param>
 /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param>
 /// <param name="resultTableSoFar">the table of alignment results that has been generated so far using pair-wise alignment</param>
 /// <param name="rowInTable">this particular alignment problem will occupy a cell in this row the result table.</param>
 /// <param name="columnInTable">this particular alignment will occupy a cell in this column of the result table.</param>
 /// <returns>the alignment score for sequenceA and sequenceB.  The calling function places the result in entry rowInTable,columnInTable
 /// of the ResultTable</returns>
 public int Align(GeneSequence sequenceA, GeneSequence sequenceB, ResultTable resultTableSoFar, int rowInTable, int columnInTable)
 {
     // a place holder computation.  You'll want to implement your code here.
     return(Math.Abs(sequenceA.Sequence.Length - sequenceB.Sequence.Length));
 }