public static List <SequenceAlignment> GetFilteredSequenceAlignments(IChain n, bool nKeepRepeats, IChain c, bool cKeepRepeats, SS allowedTypes, int minAlignmentLength = DefaultMinAlignmentLength, float maxRmsd = DefaultRmsdThreshold) { List <SequenceAlignment> alignments = GetAlignmentsPreservingFullSsBlocks(n, c, allowedTypes, minAlignmentLength, maxRmsd); int maxThirdHelixShortening = DefaultMaxThirdHelixTrimming; //int maxOvershoot = 8; // Find all alignments and remove those that would: // -- remove more than one repeat of a repeat protein if (nKeepRepeats) { int nRepeatLength; if (Sequence.TryGetInternalRepeatLength(n, out nRepeatLength) && nRepeatLength > 15) { alignments.RemoveAll(a => a.Range1.End < n.Count - 1.25 * nRepeatLength); } } if (cKeepRepeats) { int cRepeatLength; if (Sequence.TryGetInternalRepeatLength(c, out cRepeatLength) && cRepeatLength > 15) { alignments.RemoveAll(a => a.Range1.Start < 1.25 * cRepeatLength); } } // -- leave two or fewer secondary structure elements in either chain. An exception is made if there are only two secondary structure // elements total to begin with, which is common for helical bundles List <SSBlock> nBlocks = SecondaryStructure.GetPhiPsiSSBlocksOfType(n, SS.Helix | SS.Extended, minAlignmentLength); List <SSBlock> cBlocks = SecondaryStructure.GetPhiPsiSSBlocksOfType(c, SS.Helix | SS.Extended, minAlignmentLength); if (nBlocks.Count > 2) { alignments.RemoveAll(a => a.Range1.Start < nBlocks[1].End); } if (cBlocks.Count > 2) { alignments.RemoveAll(a => a.Range2.End > cBlocks[cBlocks.Count - 2].Start); } // -- shorten the third helix of a 3-helix bundle too much, because that would also probably destabilize the oligomer if (nBlocks.Count > 2) { int removeCount = alignments.RemoveAll(a => a.Range1.End < nBlocks[2].End - maxThirdHelixShortening); } if (cBlocks.Count > 2) { int removeCount = alignments.RemoveAll(a => a.Range2.Start > cBlocks[cBlocks.Count - 3].Start + maxThirdHelixShortening); } return(alignments); }
/// <summary> /// Find splicing alignment ranges for two peptides that would retain a full secondary structure block for at least one of the two peptides. This prevents /// short splice overlaps that remove some of the secondary structure on both sides of the alignment. Basically, the alignment must result in either: /// 1) one secondary structure block being fully included in the other /// 2) the N-terminal peptide being spliced has the end of its block fully in the resultant alignment, thus preserving the entire N-side block /// 3) the C-terminal peptide being spliced has the start of its block fully in the resultant alignment, thus preserving the entire C-side block /// </summary> /// <param name="n"></param> /// <param name="c"></param> /// <param name="allowedTypes"></param> /// <param name="minAlignmentLength"></param> /// <returns></returns> public static List <SequenceAlignment> GetAlignmentsPreservingFullSsBlocks(IChain n, IChain c, SS allowedTypes, int minAlignmentLength = DefaultMinAlignmentLength, float maxRmsd = DefaultRmsdThreshold) { // TODO!!!: Debug why calling this function with peptides A,B vs B,A returns different numbers of alignments. List <SequenceAlignment> alignments = new List <SequenceAlignment>(); List <SSBlock> nBlocks = SecondaryStructure.GetPhiPsiSSBlocksOfType(n, allowedTypes, minAlignmentLength).Where(block => block.Length >= minAlignmentLength).ToList(); List <SSBlock> cBlocks = SecondaryStructure.GetPhiPsiSSBlocksOfType(c, allowedTypes, minAlignmentLength).Where(block => block.Length >= minAlignmentLength).ToList(); foreach (SSBlock nBlock in nBlocks) { foreach (SSBlock cBlock in cBlocks) { for (int nAlignmentOfBlockC = nBlock.End - minAlignmentLength + 1; nAlignmentOfBlockC + cBlock.Length > nBlock.Start + minAlignmentLength; nAlignmentOfBlockC--) { // Figure out the start and ends of the overlap region int nStart = Math.Max(nBlock.Start, nAlignmentOfBlockC); int nEnd = Math.Min(nBlock.End, nAlignmentOfBlockC + cBlock.Length - 1); int alignmentLength = nEnd - nStart + 1; int cStart = Math.Max(cBlock.Start, cBlock.Start + (nStart - nAlignmentOfBlockC)); int cEnd = cStart + alignmentLength - 1; Debug.Assert(nBlock.Start <= nStart && nEnd <= nBlock.End); Debug.Assert(cBlock.Start <= cStart && cEnd <= cBlock.End); Debug.Assert(nEnd - nStart == cEnd - cStart); bool cFullyIncluded = (cBlock.Start == cStart && cBlock.End == cEnd); bool nFullyIncluded = (nBlock.Start == nStart && nBlock.End == nEnd); if (!nFullyIncluded && !cFullyIncluded && cStart != cBlock.Start && nEnd != nBlock.End) { continue; } if (Rmsd.GetRmsdNCAC(n, nStart, nEnd, c, cStart, cEnd) > maxRmsd) { continue; } alignments.Add(new SequenceAlignment(nStart, nEnd, cStart, cEnd)); } } } return(alignments); }
/// <summary> /// Find splicing alignment ranges for two peptides that would retain a full secondary structure block for at least one of the two peptides. This prevents /// short splice overlaps that remove some of the secondary structure on both sides of the alignment. Basically, the alignment must result in either: /// 1) one secondary structure block being fully included in the other /// 2) the N-terminal peptide being spliced has the end of its block fully in the resultant alignment, thus preserving the entire N-side block /// 3) the C-terminal peptide being spliced has the start of its block fully in the resultant alignment, thus preserving the entire C-side block /// </summary> /// <param name="chain1"></param> /// <param name="chain2"></param> /// <param name="allowedTypes"></param> /// <param name="minAlignmentLength"></param> /// <returns></returns> public static List <TransformSequenceAlignment> GetTransformAlignments(IChain chain1, IChain chain2, SS allowedTypes, int minAlignmentLength = DefaultMinAlignmentLength, float maxRmsd = DefaultRmsdThreshold) { List <TransformSequenceAlignment> alignments = new List <TransformSequenceAlignment>(); List <SSBlock> nBlocks = SecondaryStructure.GetPhiPsiSSBlocksOfType(chain1, allowedTypes, minAlignmentLength).Where(block => block.Length >= minAlignmentLength).ToList(); List <SSBlock> cBlocks = SecondaryStructure.GetPhiPsiSSBlocksOfType(chain2, allowedTypes, minAlignmentLength).Where(block => block.Length >= minAlignmentLength).ToList(); // Previously, calling this function with peptides A,B vs B,A returns different numbers of alignments. The original intent was for the first chain passed in to be at the // N-terminus and the second at the C-terminus (post-fusion), so these post-fusion lengths were being calculated here and culled differently based on order. foreach (SSBlock nBlock in nBlocks) { foreach (SSBlock cBlock in cBlocks) { for (int nAlignmentOfBlockC = nBlock.End - minAlignmentLength + 1; nAlignmentOfBlockC + cBlock.Length >= nBlock.Start + minAlignmentLength; nAlignmentOfBlockC--) { // Figure out the start and ends of the overlap region int nStart = Math.Max(nBlock.Start, nAlignmentOfBlockC); int nEnd = Math.Min(nBlock.End, nAlignmentOfBlockC + cBlock.Length - 1); int alignmentLength = nEnd - nStart + 1; int cStart = Math.Max(cBlock.Start, cBlock.Start + (nStart - nAlignmentOfBlockC)); int cEnd = cStart + alignmentLength - 1; //Debug.WriteLine("Comparing {0}-{1} vs {2}-{3}", nStart, nEnd, cStart, cEnd); Debug.Assert(nBlock.Start <= nStart && nEnd <= nBlock.End); Debug.Assert(cBlock.Start <= cStart && cEnd <= cBlock.End); Debug.Assert(nEnd - nStart == cEnd - cStart); //bool cFullyIncluded = (cBlock.Start == cStart && cBlock.End == cEnd); //bool nFullyIncluded = (nBlock.Start == nStart && nBlock.End == nEnd); //if (!nFullyIncluded && !cFullyIncluded && cStart != cBlock.Start && nEnd != nBlock.End) // continue; Trace.Assert(nEnd - nStart + 1 >= minAlignmentLength); float rmsd = float.NaN; Matrix matrix = Rmsd.GetRmsdAndTransform(chain1, nStart, nEnd, chain2, cStart, cEnd, out rmsd); bool fail = rmsd > maxRmsd; #if DEBUG && false float rmsd2 = float.NaN; Matrix matrix2 = Rmsd.GetRmsdAndTransform(chain2, cStart, cEnd, chain1, nStart, nEnd, out rmsd2); bool fail2 = rmsd2 > maxRmsd; Trace.Assert(fail == fail2); #endif if (rmsd > maxRmsd) { continue; } TransformSequenceAlignment alignment = new TransformSequenceAlignment(nStart, nEnd, cStart, cEnd); alignment.Centroid1 = Geometry.GetCenterNCAC(chain1[nStart, nEnd]); alignment.Centroid2 = Geometry.GetCenterNCAC(chain2[cStart, cEnd]); alignment.Align1 = matrix; alignment.Align2 = Matrix.Invert(matrix); alignments.Add(alignment); } } } return(alignments); }