/// <summary> /// Calculates P value and returns it in scientific notation string. /// </summary> /// <returns></returns> public async Task <string> CalculatePValueAsync() { // Get the actual score for the sequences we tried to align AlignmentImplementationResults results = alignmentImpl.FindOptimalAlignment(); var scoringTasks = new List <Task <int[]> >(); // Outer loop runs permutation loop. // Inner loop permutes the sequence. for (int i = 0; i < permutationLimit; i++) { scoringTasks.Add(this.PermuteStringAndScoreAsync()); } int[][] scores = await Task.WhenAll(scoringTasks); List <int> flattenedScores = FlattenedArray(scores); // Find scores which are better than the actual score for initial pair of sequence. int[] equalOrBetterScores = flattenedScores.Where( s => s >= results.AlignmentScore).ToArray(); return((((double)equalOrBetterScores.Length + 1) / ((double)flattenedScores.Count + 1)).ToString("E5")); }
/// <summary> /// Creates instance of algorithm and scores the inputs. /// </summary> Task <int> FindScoreAsync(SequenceMetadata sequenceToMatch, SequenceMetadata targetSequence) { var algorithm = (T)Activator.CreateInstance( typeof(T), sequenceToMatch, targetSequence, this.alignmentImpl.ScoreProvider, this.alignmentImpl.GapOpenPenality); AlignmentImplementationResults results = algorithm.FindOptimalAlignment(); return(Task.FromResult <int>(results.AlignmentScore)); }
public async Task SmithWatermanP15172ToP17542WithBlosum62ScoringScheme() { SequenceMetadata item1 = await FastALookupCient.LookupByAccessionIdAsync("P10085"); SequenceMetadata item2 = await FastALookupCient.LookupByAccessionIdAsync("P15172"); var localAlignmentImpl = new SmithWatermanImplementation( sequenceTomatch: item1, targetSequence: item2, scoreProvider: new Blosum62ScoreProvider(), gapOpenPenality: 4); //var localAlignmentImpl = new SmithWatermanImplementation( // sequenceTomatch: "KEVLAR", // targetSequence: "KNIEVIL", // scoreProvider: new Blosum62ScoreProvider(), // gapOpenPenality: 4); AlignmentImplementationResults result = localAlignmentImpl.FindOptimalAlignment(); Console.WriteLine("--Optimal Alignment--"); Console.WriteLine(result.TargetSequenceAlignment); Console.WriteLine(result.SearchSequenceAlignment); Console.WriteLine("--Optimal Score--"); Console.WriteLine(result.AlignmentScore); var pvalueCalculator = new PValueCalculator <SmithWatermanImplementation> ( alignmentImpl: localAlignmentImpl, permutationLimit: 1 ); string pValue = await pvalueCalculator.CalculatePValueAsync(); Assert.IsNotNull(result); Console.WriteLine("--Calculated P value--"); Console.WriteLine(pValue); Console.WriteLine(result.PrettyPrint()); Console.WriteLine(result.PrettyPrintScoreMatrix()); }
/// <summary> /// Traceback logic to find out the optimally aligning sequences. /// </summary> /// <returns></returns> AlignmentImplementationResults TracebackOptimalSequences() { int maximumAlignmentScore = 0; int maximumrowIndex = 0; int maximumcolumnIndex = 0; for (int rowIndex = 0; rowIndex < this.SequenceToMatch.Sequence.Length + 1; rowIndex++) { for (int columnIndex = 0; columnIndex < this.TargetSequence.Sequence.Length + 1; columnIndex++) { if (substitutionMatrix[rowIndex, columnIndex] > maximumAlignmentScore) { maximumAlignmentScore = substitutionMatrix[rowIndex, columnIndex]; maximumrowIndex = rowIndex; maximumcolumnIndex = columnIndex; } } } string targetSequenceAlignment = String.Empty; string searchSequenceAlignment = String.Empty; var result = new AlignmentImplementationResults { AlignmentScore = maximumAlignmentScore, ScoreProvider = this.ScoreProvider, ScoreMatrix = substitutionMatrix, TargetSequenceAlignment = targetSequenceAlignment, TargetAccessionId = this.TargetSequence.AccessionId, SearchAccessionId = this.SequenceToMatch.AccessionId, SearchSequenceAlignment = searchSequenceAlignment }; this.TracebackContributingNeighbour( maximumAlignmentScore, maximumrowIndex, maximumcolumnIndex, result); return(result); }
public async Task SmithWatermanImplementationMainlineWithSimpleScoringScheme() { SequenceMetadata item1 = await FastALookupCient.LookupByAccessionIdAsync("Q10574"); SequenceMetadata item2 = await FastALookupCient.LookupByAccessionIdAsync("P15172"); var localAlignmentImpl = new SmithWatermanImplementation( sequenceTomatch: item1, targetSequence: item2, scoreProvider: new SimpleScoreProvider(), gapOpenPenality: 1); AlignmentImplementationResults result = localAlignmentImpl.FindOptimalAlignment(); Console.WriteLine("--Optimal Alignment--"); Console.WriteLine(result.TargetSequenceAlignment); Console.WriteLine(result.SearchSequenceAlignment); Console.WriteLine("--Optimal Score--"); Console.WriteLine(result.AlignmentScore); Assert.AreEqual(expected: 19, actual: result.AlignmentScore, message: "Mismatching alignment scores"); Assert.AreEqual( expected: "VE-IL-RNA-IRY-I-E-GL-QA-LL-RDQD", actual: result.TargetSequenceAlignment, message: "Mismatching target alignment sequence"); Assert.AreEqual( expected: "-FE-TL-QMA-QKY-I-E-CL-SQ-IL-KQD", actual: result.SearchSequenceAlignment, message: "Mismatching target alignment sequence"); }
static void Main(string[] args) { string outputLocation = @"c:\temp\output_jeeshn.txt"; var proteinAccessionIds = new string[] { "P15172", "P17542", "P10085", "P16075", "P13904", "Q90477", "Q8IU24", "P22816", "Q10574", "O95363" }; // Input data for a simple sequence var item1 = new SequenceMetadata { AccessionId = "X1", Sequence = "deadly" }; var item2 = new SequenceMetadata { AccessionId = "X2", Sequence = "ddgearlyk" }; using (var fileStream = File.Open(outputLocation, FileMode.Create)) using (var streamWriter = new StreamWriter(fileStream)) { // Run the local alignment . var localAlignmentImpl = new SmithWatermanImplementation( sequenceTomatch: item1, targetSequence: item2, scoreProvider: new Blosum62ScoreProvider(), gapOpenPenality: 4); AlignmentImplementationResults result = localAlignmentImpl.FindOptimalAlignment(); // "Capture" output WriteToConsoleAndFile( String.Format("{0} vs {1}", item1.AccessionId, item2.AccessionId), streamWriter); WriteToConsoleAndFile("Alignment Score", streamWriter); WriteToConsoleAndFile(result.AlignmentScore.ToString(), streamWriter); WriteToConsoleAndFile("Alignment", streamWriter); WriteToConsoleAndFile(result.PrettyPrint(), streamWriter); WriteToConsoleAndFile("Score Matrix", streamWriter); WriteToConsoleAndFile(result.PrettyPrintScoreMatrix(), streamWriter); // P-value calculation var pvalueCalculator = new PValueCalculator <SmithWatermanImplementation> ( alignmentImpl: localAlignmentImpl, permutationLimit: 999 ); //Print p values string pValue = pvalueCalculator.CalculatePValueAsync().Result; WriteToConsoleAndFile("Empirical p-value", streamWriter); WriteToConsoleAndFile(pValue, streamWriter); WriteToConsoleAndFile(String.Empty, streamWriter); int[,] scoreMatrix = new int[10, 10]; for (int i = 0; i < proteinAccessionIds.Length; i++) { for (int j = 0; j < proteinAccessionIds.Length; j++) { // no need to compare same sequences if (i != j) { item1 = FastALookupCient.LookupByAccessionIdAsync(proteinAccessionIds[i]).Result; item2 = FastALookupCient.LookupByAccessionIdAsync(proteinAccessionIds[j]).Result; localAlignmentImpl = new SmithWatermanImplementation( sequenceTomatch: item1, targetSequence: item2, scoreProvider: new Blosum62ScoreProvider(), gapOpenPenality: 4); result = localAlignmentImpl.FindOptimalAlignment(); // "Capture" output WriteToConsoleAndFile( String.Format("{0} vs {1}", item1.AccessionId, item2.AccessionId), streamWriter); WriteToConsoleAndFile("Alignment Score", streamWriter); WriteToConsoleAndFile(result.AlignmentScore.ToString(), streamWriter); WriteToConsoleAndFile("Alignment", streamWriter); WriteToConsoleAndFile(result.PrettyPrint(), streamWriter); scoreMatrix[i, j] = result.AlignmentScore; if (item1.AccessionId == "P15172" && (item2.AccessionId == "Q10574" || item2.AccessionId == "O95363")) { pvalueCalculator = new PValueCalculator <SmithWatermanImplementation> ( alignmentImpl: localAlignmentImpl, permutationLimit: 999 ); //Print p values pValue = pvalueCalculator.CalculatePValueAsync().Result; WriteToConsoleAndFile("Empirical p-value", streamWriter); WriteToConsoleAndFile(pValue, streamWriter); WriteToConsoleAndFile(String.Empty, streamWriter); } } } } var stringBuilder = new StringBuilder(); for (int i = 0; i <= scoreMatrix.GetUpperBound(0); i++) { for (int j = 0; j <= scoreMatrix.GetUpperBound(1); j++) { if (j >= i) { stringBuilder.Append(scoreMatrix[i, j].ToString().PadRight(6, ' ')); } else { stringBuilder.Append("0".PadRight(6, ' ')); } } stringBuilder.AppendLine(); } WriteToConsoleAndFile("Protein scoring matrix", streamWriter); WriteToConsoleAndFile(stringBuilder.ToString(), streamWriter); streamWriter.Flush(); fileStream.Flush(); } Console.ReadLine(); }
/// <summary> /// Recursive function to track back the optimal alignment. /// </summary> void TracebackContributingNeighbour( int currentValue, int currentRowIndex, int currentColumnIndex, AlignmentImplementationResults result) { // No need to go further if we have found a begining of new gap. if (currentValue == 0) { return; } int diagonalNeighbour = substitutionMatrix[currentRowIndex - 1, currentColumnIndex - 1]; int horizontalNeigbour = substitutionMatrix[currentRowIndex, currentColumnIndex - 1]; int verticalNeigbour = substitutionMatrix[currentRowIndex - 1, currentColumnIndex]; int diagonallyDerivedSubstitution = diagonalNeighbour + this.ScoreProvider.LookupPairwiseAlignmentScore( this.SequenceToMatch.Sequence[currentRowIndex - 1], this.TargetSequence.Sequence[currentColumnIndex - 1]); int verticallyDerivedSubstitution = verticalNeigbour - GapOpenPenality; int horizontallyDerivedSubstitution = horizontalNeigbour - GapOpenPenality; if (currentValue == diagonallyDerivedSubstitution) { result.TargetSequenceAlignment = this.TargetSequence.Sequence[currentColumnIndex - 1] + result.TargetSequenceAlignment; result.SearchSequenceAlignment = this.SequenceToMatch.Sequence[currentRowIndex - 1] + result.SearchSequenceAlignment; result.TargetAlignmentIndexes.Insert(0, new AlignmentItem { Index = currentColumnIndex - 1, AlignedCharacter = this.TargetSequence.Sequence[currentColumnIndex - 1] }); result.SearchAlignmentIndexes.Insert(0, new AlignmentItem { Index = currentRowIndex - 1, AlignedCharacter = this.SequenceToMatch.Sequence[currentRowIndex - 1] }); this.TracebackContributingNeighbour( diagonalNeighbour, currentRowIndex - 1, currentColumnIndex - 1, result); } else if (currentValue == verticallyDerivedSubstitution) { result.TargetSequenceAlignment = "-" + result.TargetSequenceAlignment; result.SearchSequenceAlignment = this.SequenceToMatch.Sequence[currentRowIndex - 1] + result.SearchSequenceAlignment; result.TargetAlignmentIndexes.Insert(0, new AlignmentItem { // Copy the last index for a gap. It becomes easy to pretty print. Index = result.TargetAlignmentIndexes[0].Index, AlignedCharacter = '-' }); result.SearchAlignmentIndexes.Insert(0, new AlignmentItem { Index = currentRowIndex - 1, AlignedCharacter = this.SequenceToMatch.Sequence[currentRowIndex - 1] }); this.TracebackContributingNeighbour( verticalNeigbour, currentRowIndex - 1, currentColumnIndex, result); } else if (currentValue == horizontallyDerivedSubstitution) { result.TargetSequenceAlignment = this.TargetSequence.Sequence[currentColumnIndex - 1] + result.TargetSequenceAlignment; result.SearchSequenceAlignment = "-" + result.SearchSequenceAlignment; result.TargetAlignmentIndexes.Insert(0, new AlignmentItem { Index = currentColumnIndex - 1, AlignedCharacter = this.TargetSequence.Sequence[currentColumnIndex - 1] }); result.SearchAlignmentIndexes.Insert(0, new AlignmentItem { // Copy the last index for a gap. It becomes easy to pretty print. Index = result.SearchAlignmentIndexes[0].Index, AlignedCharacter = '-' }); this.TracebackContributingNeighbour( horizontalNeigbour, currentRowIndex, currentColumnIndex - 1, result); } }