public static List <SequenceAlignment> GetFilteredSequenceAlignments(IChain n, bool nKeepRepeats, IChain c, bool cKeepRepeats, SS allowedTypes, int minAlignmentLength = DefaultMinAlignmentLength, float maxRmsd = DefaultRmsdThreshold) { List <SequenceAlignment> alignments = GetAlignmentsPreservingFullSsBlocks(n, c, allowedTypes, minAlignmentLength, maxRmsd); int maxThirdHelixShortening = DefaultMaxThirdHelixTrimming; //int maxOvershoot = 8; // Find all alignments and remove those that would: // -- remove more than one repeat of a repeat protein if (nKeepRepeats) { int nRepeatLength; if (Sequence.TryGetInternalRepeatLength(n, out nRepeatLength) && nRepeatLength > 15) { alignments.RemoveAll(a => a.Range1.End < n.Count - 1.25 * nRepeatLength); } } if (cKeepRepeats) { int cRepeatLength; if (Sequence.TryGetInternalRepeatLength(c, out cRepeatLength) && cRepeatLength > 15) { alignments.RemoveAll(a => a.Range1.Start < 1.25 * cRepeatLength); } } // -- leave two or fewer secondary structure elements in either chain. An exception is made if there are only two secondary structure // elements total to begin with, which is common for helical bundles List <SSBlock> nBlocks = SecondaryStructure.GetPhiPsiSSBlocksOfType(n, SS.Helix | SS.Extended, minAlignmentLength); List <SSBlock> cBlocks = SecondaryStructure.GetPhiPsiSSBlocksOfType(c, SS.Helix | SS.Extended, minAlignmentLength); if (nBlocks.Count > 2) { alignments.RemoveAll(a => a.Range1.Start < nBlocks[1].End); } if (cBlocks.Count > 2) { alignments.RemoveAll(a => a.Range2.End > cBlocks[cBlocks.Count - 2].Start); } // -- shorten the third helix of a 3-helix bundle too much, because that would also probably destabilize the oligomer if (nBlocks.Count > 2) { int removeCount = alignments.RemoveAll(a => a.Range1.End < nBlocks[2].End - maxThirdHelixShortening); } if (cBlocks.Count > 2) { int removeCount = alignments.RemoveAll(a => a.Range2.Start > cBlocks[cBlocks.Count - 3].Start + maxThirdHelixShortening); } return(alignments); }
/// <summary> /// Find splicing alignment ranges for two peptides that would retain a full secondary structure block for at least one of the two peptides. This prevents /// short splice overlaps that remove some of the secondary structure on both sides of the alignment. Basically, the alignment must result in either: /// 1) one secondary structure block being fully included in the other /// 2) the N-terminal peptide being spliced has the end of its block fully in the resultant alignment, thus preserving the entire N-side block /// 3) the C-terminal peptide being spliced has the start of its block fully in the resultant alignment, thus preserving the entire C-side block /// </summary> /// <param name="n"></param> /// <param name="c"></param> /// <param name="allowedTypes"></param> /// <param name="minAlignmentLength"></param> /// <returns></returns> public static List <SequenceAlignment> GetAlignmentsPreservingFullSsBlocks(IChain n, IChain c, SS allowedTypes, int minAlignmentLength = DefaultMinAlignmentLength, float maxRmsd = DefaultRmsdThreshold) { // TODO!!!: Debug why calling this function with peptides A,B vs B,A returns different numbers of alignments. List <SequenceAlignment> alignments = new List <SequenceAlignment>(); List <SSBlock> nBlocks = SecondaryStructure.GetPhiPsiSSBlocksOfType(n, allowedTypes, minAlignmentLength).Where(block => block.Length >= minAlignmentLength).ToList(); List <SSBlock> cBlocks = SecondaryStructure.GetPhiPsiSSBlocksOfType(c, allowedTypes, minAlignmentLength).Where(block => block.Length >= minAlignmentLength).ToList(); foreach (SSBlock nBlock in nBlocks) { foreach (SSBlock cBlock in cBlocks) { for (int nAlignmentOfBlockC = nBlock.End - minAlignmentLength + 1; nAlignmentOfBlockC + cBlock.Length > nBlock.Start + minAlignmentLength; nAlignmentOfBlockC--) { // Figure out the start and ends of the overlap region int nStart = Math.Max(nBlock.Start, nAlignmentOfBlockC); int nEnd = Math.Min(nBlock.End, nAlignmentOfBlockC + cBlock.Length - 1); int alignmentLength = nEnd - nStart + 1; int cStart = Math.Max(cBlock.Start, cBlock.Start + (nStart - nAlignmentOfBlockC)); int cEnd = cStart + alignmentLength - 1; Debug.Assert(nBlock.Start <= nStart && nEnd <= nBlock.End); Debug.Assert(cBlock.Start <= cStart && cEnd <= cBlock.End); Debug.Assert(nEnd - nStart == cEnd - cStart); bool cFullyIncluded = (cBlock.Start == cStart && cBlock.End == cEnd); bool nFullyIncluded = (nBlock.Start == nStart && nBlock.End == nEnd); if (!nFullyIncluded && !cFullyIncluded && cStart != cBlock.Start && nEnd != nBlock.End) { continue; } if (Rmsd.GetRmsdNCAC(n, nStart, nEnd, c, cStart, cEnd) > maxRmsd) { continue; } alignments.Add(new SequenceAlignment(nStart, nEnd, cStart, cEnd)); } } } return(alignments); }
/// <summary> /// Find splicing alignment ranges for two peptides that would retain a full secondary structure block for at least one of the two peptides. This prevents /// short splice overlaps that remove some of the secondary structure on both sides of the alignment. Basically, the alignment must result in either: /// 1) one secondary structure block being fully included in the other /// 2) the N-terminal peptide being spliced has the end of its block fully in the resultant alignment, thus preserving the entire N-side block /// 3) the C-terminal peptide being spliced has the start of its block fully in the resultant alignment, thus preserving the entire C-side block /// </summary> /// <param name="chain1"></param> /// <param name="chain2"></param> /// <param name="allowedTypes"></param> /// <param name="minAlignmentLength"></param> /// <returns></returns> public static List <TransformSequenceAlignment> GetTransformAlignments(IChain chain1, IChain chain2, SS allowedTypes, int minAlignmentLength = DefaultMinAlignmentLength, float maxRmsd = DefaultRmsdThreshold) { List <TransformSequenceAlignment> alignments = new List <TransformSequenceAlignment>(); List <SSBlock> nBlocks = SecondaryStructure.GetPhiPsiSSBlocksOfType(chain1, allowedTypes, minAlignmentLength).Where(block => block.Length >= minAlignmentLength).ToList(); List <SSBlock> cBlocks = SecondaryStructure.GetPhiPsiSSBlocksOfType(chain2, allowedTypes, minAlignmentLength).Where(block => block.Length >= minAlignmentLength).ToList(); // Previously, calling this function with peptides A,B vs B,A returns different numbers of alignments. The original intent was for the first chain passed in to be at the // N-terminus and the second at the C-terminus (post-fusion), so these post-fusion lengths were being calculated here and culled differently based on order. foreach (SSBlock nBlock in nBlocks) { foreach (SSBlock cBlock in cBlocks) { for (int nAlignmentOfBlockC = nBlock.End - minAlignmentLength + 1; nAlignmentOfBlockC + cBlock.Length >= nBlock.Start + minAlignmentLength; nAlignmentOfBlockC--) { // Figure out the start and ends of the overlap region int nStart = Math.Max(nBlock.Start, nAlignmentOfBlockC); int nEnd = Math.Min(nBlock.End, nAlignmentOfBlockC + cBlock.Length - 1); int alignmentLength = nEnd - nStart + 1; int cStart = Math.Max(cBlock.Start, cBlock.Start + (nStart - nAlignmentOfBlockC)); int cEnd = cStart + alignmentLength - 1; //Debug.WriteLine("Comparing {0}-{1} vs {2}-{3}", nStart, nEnd, cStart, cEnd); Debug.Assert(nBlock.Start <= nStart && nEnd <= nBlock.End); Debug.Assert(cBlock.Start <= cStart && cEnd <= cBlock.End); Debug.Assert(nEnd - nStart == cEnd - cStart); //bool cFullyIncluded = (cBlock.Start == cStart && cBlock.End == cEnd); //bool nFullyIncluded = (nBlock.Start == nStart && nBlock.End == nEnd); //if (!nFullyIncluded && !cFullyIncluded && cStart != cBlock.Start && nEnd != nBlock.End) // continue; Trace.Assert(nEnd - nStart + 1 >= minAlignmentLength); float rmsd = float.NaN; Matrix matrix = Rmsd.GetRmsdAndTransform(chain1, nStart, nEnd, chain2, cStart, cEnd, out rmsd); bool fail = rmsd > maxRmsd; #if DEBUG && false float rmsd2 = float.NaN; Matrix matrix2 = Rmsd.GetRmsdAndTransform(chain2, cStart, cEnd, chain1, nStart, nEnd, out rmsd2); bool fail2 = rmsd2 > maxRmsd; Trace.Assert(fail == fail2); #endif if (rmsd > maxRmsd) { continue; } TransformSequenceAlignment alignment = new TransformSequenceAlignment(nStart, nEnd, cStart, cEnd); alignment.Centroid1 = Geometry.GetCenterNCAC(chain1[nStart, nEnd]); alignment.Centroid2 = Geometry.GetCenterNCAC(chain2[cStart, cEnd]); alignment.Align1 = matrix; alignment.Align2 = Matrix.Invert(matrix); alignments.Add(alignment); } } } return(alignments); }
public static IChain GetChain(IChain[] peptides, SequenceAlignment[] alignments, Selection immutableAas) { IChain fusion = new Chain(); // Do all pairwise analysis for (int i = 0; i < peptides.Length - 1; i++) { // Determine the ranges outside of the splice int start1 = i == 0 ? 0 : alignments[i - 1].Range2.End + 1; int end1 = alignments[i].Range1.Start - 1; int start2 = alignments[i].Range2.End + 1; int end2 = i < alignments.Length - 1 ? alignments[i + 1].Range1.Start - 1 : peptides[i + 1].Count - 1; // Add the non-overlapping region of the first peptide if (start1 <= end1) { foreach (Aa aa in peptides[i][start1, end1]) { Aa copy = new Aa(aa, i == 0 && start1 == 0, false); copy.NodeTransform = aa.TotalTransform; fusion.Add(copy); } } // Add the alignment region, selecting either from the first or second peptide so as to minimize clashes with the sidechains that // are for sure being kept on either side SequenceAlignment alignment = alignments[i]; Debug.Assert(alignment.Range1.Length == alignment.Range2.Length); for (int alignmentOffset = 0; alignmentOffset < alignment.Range1.Length; alignmentOffset++) { int index1 = alignment.Range1.Start + alignmentOffset; int index2 = alignment.Range2.Start + alignmentOffset; IAa option1 = peptides[i][index1]; IAa option2 = peptides[i + 1][index2]; bool nTerminus = i == 0 && index1 == 0; bool cTerminus = (i == peptides.Length - 2) && (index2 == peptides[i + 1].Count - 1); if (immutableAas.Aas.Contains(option1)) { Aa copy = new Aa(option1, nTerminus, cTerminus); copy.NodeTransform = option1.TotalTransform; fusion.Add(copy); } else if (immutableAas.Aas.Contains(option2)) { Aa copy = new Aa(option2, nTerminus, cTerminus); copy.NodeTransform = option2.TotalTransform; fusion.Add(copy); } else { if (option2.Letter == 'P' && index1 >= 4) { SS[] ss1 = SecondaryStructure.GetPhiPsiSS(peptides[i], 5); bool allHelical = (ss1[index1] | ss1[index1 - 1] | ss1[index1 - 2] | ss1[index1 - 3] | ss1[index1 - 4]) == SS.Helix; if (allHelical) { Aa copy = new Aa(option1, nTerminus, cTerminus); copy.NodeTransform = option1.TotalTransform; fusion.Add(copy); continue; } } // Otherwise, select the residue with fewer clashes int clashCount1 = end2 >= start2? peptides[i + 1][start2, end2].Select(other => Clash.AnyContact(other, option1, Clash.ContactType.SidechainSidechainClash) ? 1 : 0).Aggregate(0, (a, b) => a + b) : 0; int clashCount2 = end1 >= start1? peptides[i][start1, end1].Select(other => Clash.AnyContact(other, option2, Clash.ContactType.SidechainSidechainClash) ? 1 : 0).Aggregate(0, (a, b) => a + b) : 0; if (clashCount1 <= clashCount2) { Aa copy = new Aa(option1, nTerminus, cTerminus); copy.NodeTransform = option1.TotalTransform; fusion.Add(copy); continue; } if (clashCount2 < clashCount1) { Aa copy = new Aa(option2, nTerminus, cTerminus); copy.NodeTransform = option2.TotalTransform; fusion.Add(copy); continue; } } } // Add the non-overlapping region of the last peptide if (i == peptides.Length - 2 && start2 <= end2) { foreach (Aa aa in peptides[i + 1][start2, end2]) { Aa copy = new Aa(aa, false, aa.IsCTerminus); copy.NodeTransform = aa.TotalTransform; fusion.Add(copy); } } } return(fusion); }