public static List <AlignmentInfo> GetSsAlignmentsByRmsd(Chain peptide1, Chain peptide2, SS secondaryStructure, int minimumAlignmentLength, float maxRmsd) { List <AlignmentInfo> alignments = new List <AlignmentInfo>(); List <SS> ss1 = Tools.SecondaryStructure.GetPhiPsiSS(peptide1, minimumAlignmentLength).ToList(); List <SS> ss2 = Tools.SecondaryStructure.GetPhiPsiSS(peptide2, minimumAlignmentLength).ToList(); // Slide the repeat sequence against the oligomer1 sequence and find helix-helix segments for (int alignmentOffset2 = minimumAlignmentLength - ss2.Count; alignmentOffset2 < ss1.Count - minimumAlignmentLength; alignmentOffset2++) { int maxRangeEnd1Added = -1; for (int rangeStart1 = Math.Max(0, -alignmentOffset2); rangeStart1 < Math.Min(ss1.Count - minimumAlignmentLength, ss2.Count - minimumAlignmentLength - alignmentOffset2); rangeStart1++) { // Grow the alignment length until SS no longer matches for the entire alignment, RMSD gets too big, or the sequence terminates int alignmentLength = minimumAlignmentLength; while (true) { int rangeStart2 = rangeStart1 + alignmentOffset2; int rangeEnd2 = rangeStart2 + alignmentLength - 1; int rangeEnd1 = rangeStart1 + alignmentLength - 1; bool allowableRangeExceeded = ss1.Count <= rangeEnd1 || ss2.Count <= rangeEnd2; bool onlyContainsDesiredSs = allowableRangeExceeded ? false : ss1.GetRange(rangeStart1, alignmentLength).Select(ss => ss == secondaryStructure).Aggregate((a, b) => a && b) && ss2.GetRange(rangeStart2, alignmentLength).Select(ss => ss == secondaryStructure).Aggregate((a, b) => a && b); bool rmsdExceeded = allowableRangeExceeded ? false : Rmsd.GetRmsdNCAC(peptide1, rangeStart1, rangeEnd1, peptide2, rangeStart2, rangeEnd2) > maxRmsd; if (allowableRangeExceeded || rmsdExceeded || !onlyContainsDesiredSs) { if (alignmentLength > minimumAlignmentLength && rangeEnd1 - 1 > maxRangeEnd1Added) { // This alignment failed but the previous one did not. Add the previous one - **iff it is not a subset of a previous alignment AlignmentInfo alignment = new AlignmentInfo(peptide1, peptide2, new Range(rangeStart1, rangeEnd1 - 1), new Range(rangeStart2, rangeEnd2 - 1)); alignments.Add(alignment); maxRangeEnd1Added = rangeEnd1 - 1; } // If failure occurs due to rmsd being exceeded, start again with (rangeStart + 1) because it will result in a slightly different alignment orientation // If failure occurs due to mismatching SS, start again with (rangeStart +1) because all alignments including (rangeStart) will fail // Otherwise, start at (rangeEnd + 1) because all other intermediate alignments will be identical (within the maxRmsd tolerance) if (alignmentLength > minimumAlignmentLength && !rmsdExceeded) { rangeStart1 = rangeEnd1; } break; } alignmentLength++; } } } return(alignments); }
/// <summary> /// Find splicing alignment ranges for two peptides that would retain a full secondary structure block for at least one of the two peptides. This prevents /// short splice overlaps that remove some of the secondary structure on both sides of the alignment. Basically, the alignment must result in either: /// 1) one secondary structure block being fully included in the other /// 2) the N-terminal peptide being spliced has the end of its block fully in the resultant alignment, thus preserving the entire N-side block /// 3) the C-terminal peptide being spliced has the start of its block fully in the resultant alignment, thus preserving the entire C-side block /// </summary> /// <param name="n"></param> /// <param name="c"></param> /// <param name="allowedTypes"></param> /// <param name="minAlignmentLength"></param> /// <returns></returns> public static List <SequenceAlignment> GetAlignmentsPreservingFullSsBlocks(IChain n, IChain c, SS allowedTypes, int minAlignmentLength = DefaultMinAlignmentLength, float maxRmsd = DefaultRmsdThreshold) { // TODO!!!: Debug why calling this function with peptides A,B vs B,A returns different numbers of alignments. List <SequenceAlignment> alignments = new List <SequenceAlignment>(); List <SSBlock> nBlocks = SecondaryStructure.GetPhiPsiSSBlocksOfType(n, allowedTypes, minAlignmentLength).Where(block => block.Length >= minAlignmentLength).ToList(); List <SSBlock> cBlocks = SecondaryStructure.GetPhiPsiSSBlocksOfType(c, allowedTypes, minAlignmentLength).Where(block => block.Length >= minAlignmentLength).ToList(); foreach (SSBlock nBlock in nBlocks) { foreach (SSBlock cBlock in cBlocks) { for (int nAlignmentOfBlockC = nBlock.End - minAlignmentLength + 1; nAlignmentOfBlockC + cBlock.Length > nBlock.Start + minAlignmentLength; nAlignmentOfBlockC--) { // Figure out the start and ends of the overlap region int nStart = Math.Max(nBlock.Start, nAlignmentOfBlockC); int nEnd = Math.Min(nBlock.End, nAlignmentOfBlockC + cBlock.Length - 1); int alignmentLength = nEnd - nStart + 1; int cStart = Math.Max(cBlock.Start, cBlock.Start + (nStart - nAlignmentOfBlockC)); int cEnd = cStart + alignmentLength - 1; Debug.Assert(nBlock.Start <= nStart && nEnd <= nBlock.End); Debug.Assert(cBlock.Start <= cStart && cEnd <= cBlock.End); Debug.Assert(nEnd - nStart == cEnd - cStart); bool cFullyIncluded = (cBlock.Start == cStart && cBlock.End == cEnd); bool nFullyIncluded = (nBlock.Start == nStart && nBlock.End == nEnd); if (!nFullyIncluded && !cFullyIncluded && cStart != cBlock.Start && nEnd != nBlock.End) { continue; } if (Rmsd.GetRmsdNCAC(n, nStart, nEnd, c, cStart, cEnd) > maxRmsd) { continue; } alignments.Add(new SequenceAlignment(nStart, nEnd, cStart, cEnd)); } } } return(alignments); }