Beispiel #1
0
        public static void PrintMatrix(MatrixAlignment MA, string filePath)
        {
            using (StreamWriter output = new StreamWriter(filePath))
            {
                string line = "\t\t";
                foreach (var item in MA.Protein)
                {
                    line += item + "\t";
                }
                output.WriteLine(line);

                int ind = 1;
                foreach (var array in MA.matrix)
                {
                    if (ind == 1)
                    {
                        line = "\t";
                    }
                    else if (ind >= 2)
                    {
                        line = MA.Query[ind - 2] + "\t";
                    }


                    foreach (var a in array)
                    {
                        line += a.MaxCost + "\t";
                    }

                    output.WriteLine(line);

                    ind++;
                }
            }
        }
Beispiel #2
0
        public static char[][] SequenceComparison(MatrixAlignment MA, Stack <Tuple <int, int, int> > path)
        {
            char[][] seqs = new char[2][];
            seqs[0] = new char[path.Count];
            seqs[1] = new char[path.Count];

            int ind = 0;

            foreach (var pos in path)
            {
                if (pos.Item3 == 0)
                {
                    seqs[0][ind] = MA.Query[pos.Item1];
                    seqs[1][ind] = MA.Protein[pos.Item2];
                }
                else if (pos.Item3 == 1)
                {
                    seqs[0][ind] = MA.Query[pos.Item1];
                    seqs[1][ind] = '-';
                }
                else if (pos.Item3 == -1)
                {
                    seqs[0][ind] = '-';
                    seqs[1][ind] = MA.Protein[pos.Item2];
                }
                ind++;
            }

            return(seqs);
        }
Beispiel #3
0
        //Needleman–Wunsch algorithm with retriction
        public static void AlignMatrix_UpLeft(MatrixAlignment MA, int gap, Dictionary <char, Dictionary <char, int> > scoreTable)
        {
            //initial score be minus infinite.
            var _maxScore = -1084;

            for (int j = 1; j < MA.QueryLength + 1; j++)
            {
                for (int i = 1; i < MA.ProteinLength + 1; i++)
                {
                    var lu = MA.matrix[j - 1][i - 1].MaxCost + scoreTable[MA.Query[j - 1]][MA.Protein[i - 1]];

                    MA.matrix[j][i].MaxCost = lu;
                    MA.matrix[j][i].Source  = new Tuple <int, int, int>(j - 1, i - 1, 0);
                }
            }

            if (MA.ProteinLength >= MA.QueryLength)
            {
                for (int i = MA.QueryLength; i < MA.ProteinLength + 1; i++)
                {
                    if (MA.matrix[MA.QueryLength][i].MaxCost > _maxScore)
                    {
                        _maxScore      = MA.matrix[MA.QueryLength][i].MaxCost;
                        MA.MaxScore    = _maxScore;
                        MA.MaxScorePos = new Tuple <int, int>(MA.QueryLength, i);
                    }
                }
            }
        }
Beispiel #4
0
        //Tuple<int, int, int> (pos j, pos i, from 0:up-left, 1:left, -1:up)
        public static Stack <Tuple <int, int, int> > Traceback(MatrixAlignment MA)
        {
            Stack <Tuple <int, int, int> > path = new Stack <Tuple <int, int, int> >();

            if (MA.MaxScore > 0)
            {
                int j = MA.MaxScorePos.Item1;
                int i = MA.MaxScorePos.Item2;
                while (j > 0 && i > 0 && MA.matrix[j][i].MaxCost > 0)
                {
                    int preJ      = MA.matrix[j][i].Source.Item1;
                    int preI      = MA.matrix[j][i].Source.Item2;
                    int direction = MA.matrix[j][i].Source.Item3;
                    path.Push(new Tuple <int, int, int>(preJ, preI, direction));

                    j = preJ;
                    i = preI;
                }
            }

            return(path);
        }
Beispiel #5
0
        //Smith–Waterman algorithm
        public static void AlignMatrix(MatrixAlignment MA, int gap, Dictionary <char, Dictionary <char, int> > scoreTable)
        {
            var _maxScore = 0;

            for (int j = 1; j < MA.QueryLength + 1; j++)
            {
                for (int i = 1; i < MA.ProteinLength + 1; i++)
                {
                    var lu = MA.matrix[j - 1][i - 1].MaxCost + scoreTable[MA.Query[j - 1]][MA.Protein[i - 1]] > 0 ? MA.matrix[j - 1][i - 1].MaxCost + scoreTable[MA.Query[j - 1]][MA.Protein[i - 1]] : 0;
                    var l  = MA.matrix[j][i - 1].MaxCost - gap > 0 ? MA.matrix[j][i - 1].MaxCost - gap : 0;
                    var u  = MA.matrix[j - 1][i].MaxCost - gap > 0 ? MA.matrix[j - 1][i].MaxCost : 0;

                    // if same score, tend to choose lu > l > u.
                    if (lu >= l && lu >= u)
                    {
                        MA.matrix[j][i].MaxCost = lu;
                        MA.matrix[j][i].Source  = new Tuple <int, int, int>(j - 1, i - 1, 0);

                        if (lu >= _maxScore)
                        {
                            _maxScore      = lu;
                            MA.MaxScore    = lu;
                            MA.MaxScorePos = new Tuple <int, int>(j, i);
                        }
                    }
                    else if (l > lu && l >= u)
                    {
                        MA.matrix[j][i].MaxCost = l;
                        MA.matrix[j][i].Source  = new Tuple <int, int, int>(j, i - 1, 1);
                    }
                    else if (u > lu && u > l)
                    {
                        MA.matrix[j][i].MaxCost = u;
                        MA.matrix[j][i].Source  = new Tuple <int, int, int>(j - 1, i, -1);
                    }
                }
            }
        }
Beispiel #6
0
        public void Run(string db_path, string query_path, int matrixId)
        {
            var queries = DataBaseManipulation.LoadQueryMap(query_path);

            var dQueries = DataBaseManipulation.DegenerateQueryMap(queries);

            var proteins = DataBaseManipulation.LoadProteinDb(db_path, true, DecoyType.None, Parameters.MaxThreadsToUse);

            var dProteins = DataBaseManipulation.DegenerateProteinDa(proteins, Parameters.MaxThreadsToUse);

            int[][] scores = new int[dQueries.Count][];

            string[][] matchedSequences = new string[dQueries.Count][];

            int gap = 32767;

            for (int i = 0; i < dQueries.Count; i++)
            {
                scores[i] = new int[dProteins.Length];

                matchedSequences[i] = new string[dProteins.Length];

                int[] threads = Enumerable.Range(0, Parameters.MaxThreadsToUse).ToArray();

                //TO DO: the MatrixAlignment can be optimized. Basicly we create a large MatrixAlignment and Clear the content each time to reduce the usage of memory.

                Parallel.ForEach(threads, (index) =>
                {
                    for (; index < dProteins.Length; index += Parameters.MaxThreadsToUse)
                    {
                        MatrixAlignment ma = new MatrixAlignment(dProteins[index].BaseSequence, dQueries[i]);

                        if (matrixId == 0)
                        {
                            MatrixAlignment.AlignMatrix(ma, gap, MatrixAlignment.ScoreTable);
                        }
                        else if (matrixId == 1)
                        {
                            MatrixAlignment.AlignMatrix_UpLeft(ma, gap, MatrixAlignment.ScoreTable);
                        }

                        scores[i][index] = ma.MaxScore;

                        var path = MatrixAlignment.Traceback(ma);

                        var seqCom = MatrixAlignment.SequenceComparison(ma, path);

                        matchedSequences[i][index] = string.Join('-', seqCom.Select(p => new string(p)));
                    }
                });
            }

            int[] addup_score = new int[dProteins.Length];
            for (int j = 0; j < dProteins.Length; j++)
            {
                for (int i = 0; i < dQueries.Count; i++)
                {
                    addup_score[j] += scores[i][j];
                }
            }

            int[] indexes = Enumerable.Range(0, dProteins.Length).ToArray();
            Array.Sort(addup_score, indexes);
            Array.Reverse(addup_score);
            Array.Reverse(indexes);

            List <Target> targets = new List <Target>();

            int ind_add = 0;

            foreach (var ind in indexes.Take(20))
            {
                var target = Target.SetTarget(addup_score[ind_add], dProteins[ind].Accession);
                ind_add++;
                target.SingleScores      = new int[dQueries.Count];
                target.Matched_sequences = new string[dQueries.Count];
                for (int i = 0; i < dQueries.Count; i++)
                {
                    target.SingleScores[i]      = scores[i][ind];
                    target.Matched_sequences[i] = matchedSequences[i][ind];
                }
                targets.Add(target);
            }

            //Write candidate out.
            var    fileName    = "target_matrix" + matrixId + ".tsv";
            string outFilePath = Path.Combine(Path.GetDirectoryName(query_path), fileName);

            WriteTargets(targets, outFilePath);
        }