/// <summary> /// Given a set of structure and alignments between chains, and directionality to identify which structure contributes the N and C-terminus, /// return selections corresponding to each block of structure that is left remaining between fusion regions, and selections constituting the fusion /// regions themselves. /// </summary> /// <param name="structures"></param> /// <param name="alignments"></param> /// <param name="ncDirections"></param> /// <param name="blocks"></param> /// <param name="junctions"></param> public static void GetSelections(IStructure[] structures, SequenceAlignment[] alignments, bool[] ncDirections, out Selection[] blocks, out Selection[] junctions, out Selection[] removed) { junctions = new Selection[alignments.Length]; blocks = new Selection[structures.Length]; removed = new Selection[structures.Length]; // Mark which aas to remove without actually removing them at first, so that their indices remain unchanged while // those to be removed are being computed Selection remove = new Selection(); // Determine junction residues and residues that are removed due to splicing for (int alignmentIndex = 0; alignmentIndex < alignments.Length; alignmentIndex++) { Selection junction = new Selection(); bool ncDirection = ncDirections[alignmentIndex]; SequenceAlignment alignment = ncDirection ? alignments[alignmentIndex] : SequenceAlignment.Reversed(alignments[alignmentIndex]); // N as chain1 IStructure structureN = ncDirection ? structures[alignmentIndex] : structures[alignmentIndex + 1]; IStructure structureC = ncDirection ? structures[alignmentIndex + 1] : structures[alignmentIndex]; IChain chainN = structureN[alignment.ChainIndex1]; IChain chainC = structureC[alignment.ChainIndex2]; remove.Aas.UnionWith(chainN[alignment.Range1.Middle + 1, chainN.Count - 1]); remove.Aas.UnionWith(chainC[0, alignment.Range2.Middle]); junction.Aas.UnionWith(chainN[alignment.Range1.Start, alignment.Range1.Middle]); junction.Aas.UnionWith(chainC[alignment.Range2.Middle + 1, alignment.Range2.End]); junctions[alignmentIndex] = junction; } // Determine blocks as the set of aas in the original structure, minus junction residues and residues that are lost from fusion for (int structureIndex = 0; structureIndex < structures.Length; structureIndex++) { IStructure structure = structures[structureIndex]; Selection block = new Selection(structure.SelectMany(chain => chain)); removed[structureIndex] = Selection.Intersect(block, remove); block.ExceptWith(remove); if (structureIndex > 0) { block.ExceptWith(junctions[structureIndex - 1]); } if (structureIndex < junctions.Length) { block.ExceptWith(junctions[structureIndex]); } blocks[structureIndex] = block; } }
public static SequenceAlignment Reversed(SequenceAlignment other) { return(new SequenceAlignment(other.Range2, other.Range1, other.ChainIndex2, other.ChainIndex1)); }
public static IChain GetChain(IChain[] peptides, SequenceAlignment[] alignments, Selection immutableAas) { IChain fusion = new Chain(); // Do all pairwise analysis for (int i = 0; i < peptides.Length - 1; i++) { // Determine the ranges outside of the splice int start1 = i == 0 ? 0 : alignments[i - 1].Range2.End + 1; int end1 = alignments[i].Range1.Start - 1; int start2 = alignments[i].Range2.End + 1; int end2 = i < alignments.Length - 1 ? alignments[i + 1].Range1.Start - 1 : peptides[i + 1].Count - 1; // Add the non-overlapping region of the first peptide if (start1 <= end1) { foreach (Aa aa in peptides[i][start1, end1]) { Aa copy = new Aa(aa, i == 0 && start1 == 0, false); copy.NodeTransform = aa.TotalTransform; fusion.Add(copy); } } // Add the alignment region, selecting either from the first or second peptide so as to minimize clashes with the sidechains that // are for sure being kept on either side SequenceAlignment alignment = alignments[i]; Debug.Assert(alignment.Range1.Length == alignment.Range2.Length); for (int alignmentOffset = 0; alignmentOffset < alignment.Range1.Length; alignmentOffset++) { int index1 = alignment.Range1.Start + alignmentOffset; int index2 = alignment.Range2.Start + alignmentOffset; IAa option1 = peptides[i][index1]; IAa option2 = peptides[i + 1][index2]; bool nTerminus = i == 0 && index1 == 0; bool cTerminus = (i == peptides.Length - 2) && (index2 == peptides[i + 1].Count - 1); if (immutableAas.Aas.Contains(option1)) { Aa copy = new Aa(option1, nTerminus, cTerminus); copy.NodeTransform = option1.TotalTransform; fusion.Add(copy); } else if (immutableAas.Aas.Contains(option2)) { Aa copy = new Aa(option2, nTerminus, cTerminus); copy.NodeTransform = option2.TotalTransform; fusion.Add(copy); } else { if (option2.Letter == 'P' && index1 >= 4) { SS[] ss1 = SecondaryStructure.GetPhiPsiSS(peptides[i], 5); bool allHelical = (ss1[index1] | ss1[index1 - 1] | ss1[index1 - 2] | ss1[index1 - 3] | ss1[index1 - 4]) == SS.Helix; if (allHelical) { Aa copy = new Aa(option1, nTerminus, cTerminus); copy.NodeTransform = option1.TotalTransform; fusion.Add(copy); continue; } } // Otherwise, select the residue with fewer clashes int clashCount1 = end2 >= start2? peptides[i + 1][start2, end2].Select(other => Clash.AnyContact(other, option1, Clash.ContactType.SidechainSidechainClash) ? 1 : 0).Aggregate(0, (a, b) => a + b) : 0; int clashCount2 = end1 >= start1? peptides[i][start1, end1].Select(other => Clash.AnyContact(other, option2, Clash.ContactType.SidechainSidechainClash) ? 1 : 0).Aggregate(0, (a, b) => a + b) : 0; if (clashCount1 <= clashCount2) { Aa copy = new Aa(option1, nTerminus, cTerminus); copy.NodeTransform = option1.TotalTransform; fusion.Add(copy); continue; } if (clashCount2 < clashCount1) { Aa copy = new Aa(option2, nTerminus, cTerminus); copy.NodeTransform = option2.TotalTransform; fusion.Add(copy); continue; } } } // Add the non-overlapping region of the last peptide if (i == peptides.Length - 2 && start2 <= end2) { foreach (Aa aa in peptides[i + 1][start2, end2]) { Aa copy = new Aa(aa, false, aa.IsCTerminus); copy.NodeTransform = aa.TotalTransform; fusion.Add(copy); } } } return(fusion); }
/// <summary> /// Outputs a structure that is a fusion of the chains indicated by the sequence alignments. In the case of a cycle (wherein the last structure /// is a copy of the first at a different position), only one set of chains for the two endpoint structures is included. /// </summary> /// <param name="structures">structures to fuse</param> /// <param name="alignments">the sequence positions at which to fuse</param> /// <param name="ncDirections">the directionality of the alignment: true -> the first structure is N-terminal, false -> C-terminal</param> /// <param name="cycle">whether the structure forms a cycle, in which case the first and last fusions affect both ends</param> /// <param name="partialChain">a partial chain has been created due to cyclization whose entirety will be restored only through asu pattering</param> /// <returns></returns> public static IStructure GetStructure(IStructure[] structures, SequenceAlignment[] alignments, bool[] ncDirections, bool cycle, out IChain partialChain) { partialChain = null; // Note: The fusion product's middle residue from the alignment range comes from the N-terminal chain, i.e the N-term range is [0, Middle] and C-term is [Middle + 1, Len-1] Trace.Assert(structures != null && alignments != null && ncDirections != null); Trace.Assert(structures.Length > 0); Trace.Assert(structures.Length == alignments.Length + 1); Trace.Assert(alignments.Length == ncDirections.Length); // Data for the cyclization case - the chain that the final fusion should be joined with fused back onto fusion products of the first chain should be tracked so that the last chain can be fused back onto that chain IChain cycleDoubleFusedChain = null; // Track what chain the first fusion chain ends up in bool isCycleDoubleFused = cycle && alignments.First().ChainIndex1 == alignments.Last().ChainIndex2; // Whether the first chain is fused both to the next chain and to the last/prior (wrap-around) chain Matrix cycleAlignment = isCycleDoubleFused? Rmsd.GetRmsdTransform(structures.First()[0][0], structures.Last()[0][0]) : Matrix.Identity; // Mark which aas to remove without actually removing them, so that their indices remain unchanged while // those to be removed are being computed Selection remove = new Selection(); for (int alignmentIndex = 0; alignmentIndex < alignments.Length; alignmentIndex++) { bool ncDirection = ncDirections[alignmentIndex]; SequenceAlignment alignment = alignments[alignmentIndex]; SequenceAlignment ncAlignment = ncDirection? alignment : SequenceAlignment.Reversed(alignment); // N as chain1 IStructure structureN = ncDirection ? structures[alignmentIndex] : structures[alignmentIndex + 1]; IStructure structureC = ncDirection ? structures[alignmentIndex + 1] : structures[alignmentIndex]; IChain chainN = structureN[ncAlignment.ChainIndex1]; IChain chainC = structureC[ncAlignment.ChainIndex2]; remove.Aas.UnionWith(chainN[ncAlignment.Range1.Middle + 1, chainN.Count - 1]); remove.Aas.UnionWith(chainC[0, ncAlignment.Range2.Middle]); // If a cycle exists, then for each chain in the first and last structure (which are identical, modulo a transform), only one copy should be // preserved. If that copy is fused on both sides, then it must be trimmed according to both the first and final sequence alignments. In such a // case, remove the entire last structure. // Cycle and fusion is to the same chain on both sides: // -> remove all of the second copy // -> transform the second copy neighbor chain onto the first // Cycle and fusion is to separate chains on either side: // -> remove all of the second copy except for the fused chain // -> remove from the first copy the chain that was fused on the second copy if (cycle && alignmentIndex == alignments.Length - 1) { if (isCycleDoubleFused) { // Mark all chains from second copy for deletion foreach (IChain chain in structures[structures.Length - 1]) { remove.Aas.UnionWith(chain); } // Mark the first structure residues for deletion on one side of the fusion point IChain chain0 = structures[0][alignment.ChainIndex2]; if (ncDirection) { remove.Aas.UnionWith(chain0[0, alignment.Range2.Middle]); } else { remove.Aas.UnionWith(chain0[alignment.Range2.Middle + 1, chain0.Count - 1]); } } else { // Mark all chains from the second copy for deletion, except the one being fused IChain keep = structures[alignmentIndex + 1][alignment.ChainIndex2]; foreach (IChain chain in structures[alignmentIndex + 1]) { if (chain != keep) { remove.Aas.UnionWith(chain); } } // For the one being fused, remove that index from the first structure remove.Aas.UnionWith(structures[0][alignment.ChainIndex2]); } } } // Remove them for (int alignmentIndex = 0; alignmentIndex < alignments.Length; alignmentIndex++) { SequenceAlignment alignment = alignments[alignmentIndex]; IStructure structure1 = structures[alignmentIndex]; IStructure structure2 = structures[alignmentIndex + 1]; IChain chain1 = structure1[alignment.ChainIndex1]; IChain chain2 = structure2[alignment.ChainIndex2]; for (int i = chain1.Count - 1; i >= 0; i--) { IAa aa = chain1[i]; if (remove.Aas.Contains(aa)) { Matrix desired = aa.TotalTransform; chain1.RemoveAt(i); aa.Transform(desired * Matrix.Invert(aa.TotalTransform)); #if DEBUG_TRANSFORMS Debug.Assert((desired - aa.TotalTransform).Translation.Length() < 0.1); #endif } } for (int i = chain2.Count - 1; i >= 0; i--) { IAa aa = chain2[i]; if (remove.Aas.Contains(aa)) { Matrix desired = aa.TotalTransform; chain2.RemoveAt(i); aa.Transform(desired * Matrix.Invert(aa.TotalTransform)); #if DEBUG_TRANSFORMS Debug.Assert((desired - aa.TotalTransform).Translation.Length() < 0.1); #endif } } if (cycle && alignmentIndex == 0) { foreach (IChain chain in structure1) { for (int i = chain.Count - 1; i >= 0; i--) { IAa aa = chain[i]; if (remove.Aas.Contains(aa)) { Matrix desired = aa.TotalTransform; chain.RemoveAt(i); aa.Transform(desired * Matrix.Invert(aa.TotalTransform)); #if DEBUG_TRANSFORMS Debug.Assert((desired - aa.TotalTransform).Translation.Length() < 0.1); #endif } } } } if (cycle && alignmentIndex == alignments.Length - 1) { foreach (IChain chain in structure2) { for (int i = chain.Count - 1; i >= 0; i--) { IAa aa = chain[i]; if (remove.Aas.Contains(aa)) { Matrix desired = aa.TotalTransform; chain.RemoveAt(i); aa.Transform(desired * Matrix.Invert(aa.TotalTransform)); #if DEBUG_TRANSFORMS Debug.Assert((desired - aa.TotalTransform).Translation.Length() < 0.1); #endif } } } } } // Join the chains and allocate the result to the second structure so as to preserve the indexes for the next fusion step for (int alignmentIndex = 0; alignmentIndex < alignments.Length; alignmentIndex++) { bool ncDirection = ncDirections[alignmentIndex]; SequenceAlignment alignment = alignments[alignmentIndex]; IStructure structure1 = structures[alignmentIndex]; IStructure structure2 = structures[alignmentIndex + 1]; IChain chain1 = structure1[alignment.ChainIndex1]; IChain chain2 = structure2[alignment.ChainIndex2]; if (ncDirection) { #if DEBUG_MIRRORS foreach (IAa aa2 in chain2.ToArray()) { chain1.AddInPlace(aa2); } #else chain1.AddArraySourceInPlace(chain2); #endif structure2[alignment.ChainIndex2, true] = chain1; } else { #if DEBUG_MIRRORS foreach (IAa aa1 in chain1.ToArray()) { chain2.AddInPlace(aa1); } #else chain2.AddArraySourceInPlace(chain1); #endif } // Track which chain contains the first chain in the cycle so that the final chain in the cycle can fuse to it if (isCycleDoubleFused && (alignmentIndex == 0 || chain1 == cycleDoubleFusedChain || chain2 == cycleDoubleFusedChain)) { cycleDoubleFusedChain = ncDirection? chain1 : chain2; } if (isCycleDoubleFused && alignmentIndex == alignments.Length - 1) { // If it's a cycle on the same chain, move the chain back to where it fuses to the first structure IChain combined = ncDirection ? chain1 : chain2; combined.Transform(cycleAlignment); if (combined == cycleDoubleFusedChain) { // If the structure[0] fusion chain has been fused all the way through, there is no need to move the // current chain to meet the first, since they're already joined partialChain = cycleDoubleFusedChain; } else { if (ncDirection) { combined.AddArraySourceInPlace(cycleDoubleFusedChain); IStructure cycleDoubleFusedParent = (IStructure)cycleDoubleFusedChain.Parent; } else { cycleDoubleFusedChain.AddArraySourceInPlace(combined); } } } } // Add all unique chains to a new structure Structure total = new Structure(); structures.SelectMany(s => s).Distinct().Where(c => c.Count > 0).ToList().ForEach(c => total.AddInPlace(c)); return(total); }
public static IChain GetPeptideWithMinimizedClashes(IChain nChain, IChain cChain, SequenceAlignment alignment, IEnumerable <int>[] immutableResidues = null) { Selection selection = new Selection(); if (immutableResidues != null) { Trace.Assert(immutableResidues.Length == 2); selection.Aas.UnionWith(immutableResidues[0].Select(index => nChain[index])); selection.Aas.UnionWith(immutableResidues[1].Select(index => cChain[index])); } return(GetChain(new IChain[] { nChain, cChain }, new SequenceAlignment[] { alignment }, selection)); }