public static void PrintMatrix(MatrixAlignment MA, string filePath) { using (StreamWriter output = new StreamWriter(filePath)) { string line = "\t\t"; foreach (var item in MA.Protein) { line += item + "\t"; } output.WriteLine(line); int ind = 1; foreach (var array in MA.matrix) { if (ind == 1) { line = "\t"; } else if (ind >= 2) { line = MA.Query[ind - 2] + "\t"; } foreach (var a in array) { line += a.MaxCost + "\t"; } output.WriteLine(line); ind++; } } }
public static char[][] SequenceComparison(MatrixAlignment MA, Stack <Tuple <int, int, int> > path) { char[][] seqs = new char[2][]; seqs[0] = new char[path.Count]; seqs[1] = new char[path.Count]; int ind = 0; foreach (var pos in path) { if (pos.Item3 == 0) { seqs[0][ind] = MA.Query[pos.Item1]; seqs[1][ind] = MA.Protein[pos.Item2]; } else if (pos.Item3 == 1) { seqs[0][ind] = MA.Query[pos.Item1]; seqs[1][ind] = '-'; } else if (pos.Item3 == -1) { seqs[0][ind] = '-'; seqs[1][ind] = MA.Protein[pos.Item2]; } ind++; } return(seqs); }
//Needleman–Wunsch algorithm with retriction public static void AlignMatrix_UpLeft(MatrixAlignment MA, int gap, Dictionary <char, Dictionary <char, int> > scoreTable) { //initial score be minus infinite. var _maxScore = -1084; for (int j = 1; j < MA.QueryLength + 1; j++) { for (int i = 1; i < MA.ProteinLength + 1; i++) { var lu = MA.matrix[j - 1][i - 1].MaxCost + scoreTable[MA.Query[j - 1]][MA.Protein[i - 1]]; MA.matrix[j][i].MaxCost = lu; MA.matrix[j][i].Source = new Tuple <int, int, int>(j - 1, i - 1, 0); } } if (MA.ProteinLength >= MA.QueryLength) { for (int i = MA.QueryLength; i < MA.ProteinLength + 1; i++) { if (MA.matrix[MA.QueryLength][i].MaxCost > _maxScore) { _maxScore = MA.matrix[MA.QueryLength][i].MaxCost; MA.MaxScore = _maxScore; MA.MaxScorePos = new Tuple <int, int>(MA.QueryLength, i); } } } }
//Tuple<int, int, int> (pos j, pos i, from 0:up-left, 1:left, -1:up) public static Stack <Tuple <int, int, int> > Traceback(MatrixAlignment MA) { Stack <Tuple <int, int, int> > path = new Stack <Tuple <int, int, int> >(); if (MA.MaxScore > 0) { int j = MA.MaxScorePos.Item1; int i = MA.MaxScorePos.Item2; while (j > 0 && i > 0 && MA.matrix[j][i].MaxCost > 0) { int preJ = MA.matrix[j][i].Source.Item1; int preI = MA.matrix[j][i].Source.Item2; int direction = MA.matrix[j][i].Source.Item3; path.Push(new Tuple <int, int, int>(preJ, preI, direction)); j = preJ; i = preI; } } return(path); }
//Smith–Waterman algorithm public static void AlignMatrix(MatrixAlignment MA, int gap, Dictionary <char, Dictionary <char, int> > scoreTable) { var _maxScore = 0; for (int j = 1; j < MA.QueryLength + 1; j++) { for (int i = 1; i < MA.ProteinLength + 1; i++) { var lu = MA.matrix[j - 1][i - 1].MaxCost + scoreTable[MA.Query[j - 1]][MA.Protein[i - 1]] > 0 ? MA.matrix[j - 1][i - 1].MaxCost + scoreTable[MA.Query[j - 1]][MA.Protein[i - 1]] : 0; var l = MA.matrix[j][i - 1].MaxCost - gap > 0 ? MA.matrix[j][i - 1].MaxCost - gap : 0; var u = MA.matrix[j - 1][i].MaxCost - gap > 0 ? MA.matrix[j - 1][i].MaxCost : 0; // if same score, tend to choose lu > l > u. if (lu >= l && lu >= u) { MA.matrix[j][i].MaxCost = lu; MA.matrix[j][i].Source = new Tuple <int, int, int>(j - 1, i - 1, 0); if (lu >= _maxScore) { _maxScore = lu; MA.MaxScore = lu; MA.MaxScorePos = new Tuple <int, int>(j, i); } } else if (l > lu && l >= u) { MA.matrix[j][i].MaxCost = l; MA.matrix[j][i].Source = new Tuple <int, int, int>(j, i - 1, 1); } else if (u > lu && u > l) { MA.matrix[j][i].MaxCost = u; MA.matrix[j][i].Source = new Tuple <int, int, int>(j - 1, i, -1); } } } }
public void Run(string db_path, string query_path, int matrixId) { var queries = DataBaseManipulation.LoadQueryMap(query_path); var dQueries = DataBaseManipulation.DegenerateQueryMap(queries); var proteins = DataBaseManipulation.LoadProteinDb(db_path, true, DecoyType.None, Parameters.MaxThreadsToUse); var dProteins = DataBaseManipulation.DegenerateProteinDa(proteins, Parameters.MaxThreadsToUse); int[][] scores = new int[dQueries.Count][]; string[][] matchedSequences = new string[dQueries.Count][]; int gap = 32767; for (int i = 0; i < dQueries.Count; i++) { scores[i] = new int[dProteins.Length]; matchedSequences[i] = new string[dProteins.Length]; int[] threads = Enumerable.Range(0, Parameters.MaxThreadsToUse).ToArray(); //TO DO: the MatrixAlignment can be optimized. Basicly we create a large MatrixAlignment and Clear the content each time to reduce the usage of memory. Parallel.ForEach(threads, (index) => { for (; index < dProteins.Length; index += Parameters.MaxThreadsToUse) { MatrixAlignment ma = new MatrixAlignment(dProteins[index].BaseSequence, dQueries[i]); if (matrixId == 0) { MatrixAlignment.AlignMatrix(ma, gap, MatrixAlignment.ScoreTable); } else if (matrixId == 1) { MatrixAlignment.AlignMatrix_UpLeft(ma, gap, MatrixAlignment.ScoreTable); } scores[i][index] = ma.MaxScore; var path = MatrixAlignment.Traceback(ma); var seqCom = MatrixAlignment.SequenceComparison(ma, path); matchedSequences[i][index] = string.Join('-', seqCom.Select(p => new string(p))); } }); } int[] addup_score = new int[dProteins.Length]; for (int j = 0; j < dProteins.Length; j++) { for (int i = 0; i < dQueries.Count; i++) { addup_score[j] += scores[i][j]; } } int[] indexes = Enumerable.Range(0, dProteins.Length).ToArray(); Array.Sort(addup_score, indexes); Array.Reverse(addup_score); Array.Reverse(indexes); List <Target> targets = new List <Target>(); int ind_add = 0; foreach (var ind in indexes.Take(20)) { var target = Target.SetTarget(addup_score[ind_add], dProteins[ind].Accession); ind_add++; target.SingleScores = new int[dQueries.Count]; target.Matched_sequences = new string[dQueries.Count]; for (int i = 0; i < dQueries.Count; i++) { target.SingleScores[i] = scores[i][ind]; target.Matched_sequences[i] = matchedSequences[i][ind]; } targets.Add(target); } //Write candidate out. var fileName = "target_matrix" + matrixId + ".tsv"; string outFilePath = Path.Combine(Path.GetDirectoryName(query_path), fileName); WriteTargets(targets, outFilePath); }