static void Main(string[] args) { // the indexes of data, contacts1 and contacts2 all match var data = MultiBindingInterface.Load(@"c:\pdb\new_data_set.csv"); var contactsPartner1 = data.Select( a => { var x = FindAtomicContacts.AtomPair.LoadAtomPairList(@"C:\pdb\new_data_set\contacts\contacts_" + a.InteractionChainsPdb1.ToLowerInvariant() + ".pdb") .Where( b => (b.Atom1.chainID.FieldValue.ToUpperInvariant() == a.InteractionChainsPdb1Chain1.ToUpperInvariant() && b.Atom2.chainID.FieldValue.ToUpperInvariant() == a.InteractionChainsPdb1Chain2.ToUpperInvariant()) || (b.Atom1.chainID.FieldValue.ToUpperInvariant() == a.InteractionChainsPdb1Chain2.ToUpperInvariant() && b.Atom2.chainID.FieldValue.ToUpperInvariant() == a.InteractionChainsPdb1Chain1.ToUpperInvariant())).ToList(); x = x.Select(c => { if (c.Atom1.chainID.FieldValue.ToUpperInvariant() == a.InteractionChainsPdb1Chain2.ToUpperInvariant()) { c.SwapAtoms(); } return(c); }).ToList(); return(x); }).ToList(); var contactsPartner2 = data.Select( a => { var x = FindAtomicContacts.AtomPair.LoadAtomPairList(@"C:\pdb\new_data_set\contacts\contacts_" + a.InteractionChainsPdb2.ToLowerInvariant() + ".pdb") .Where( b => (b.Atom1.chainID.FieldValue.ToUpperInvariant() == a.InteractionChainsPdb2Chain1.ToUpperInvariant() && b.Atom2.chainID.FieldValue.ToUpperInvariant() == a.InteractionChainsPdb2Chain2.ToUpperInvariant()) || (b.Atom1.chainID.FieldValue.ToUpperInvariant() == a.InteractionChainsPdb2Chain2.ToUpperInvariant() && b.Atom2.chainID.FieldValue.ToUpperInvariant() == a.InteractionChainsPdb2Chain1.ToUpperInvariant())).ToList(); x = x.Select(c => { if (c.Atom1.chainID.FieldValue.ToUpperInvariant() == a.InteractionChainsPdb2Chain2.ToUpperInvariant()) { c.SwapAtoms(); } return(c); }).ToList(); return(x); }).ToList(); var interfacePartner1 = contactsPartner1.Select(a => { var resSeqChain1 = a.Select(b => int.Parse(b.Atom1.resSeq.FieldValue)).ToList(); var resSeqChain2 = a.Select(b => int.Parse(b.Atom2.resSeq.FieldValue)).ToList(); if (resSeqChain1.Count > 0 && resSeqChain2.Count > 0) { return(new Tuple <int, int, int, int>(resSeqChain1.Min(), resSeqChain1.Max(), resSeqChain2.Min(), resSeqChain2.Max())); } else { return(null); } }).ToList(); var interfacePartner2 = contactsPartner2.Select(a => { var resSeqChain1 = a.Select(b => int.Parse(b.Atom1.resSeq.FieldValue)).ToList(); var resSeqChain2 = a.Select(b => int.Parse(b.Atom2.resSeq.FieldValue)).ToList(); if (resSeqChain1.Count > 0 && resSeqChain2.Count > 0) { return(new Tuple <int, int, int, int>(resSeqChain1.Min(), resSeqChain1.Max(), resSeqChain2.Min(), resSeqChain2.Max())); } else { return(null); } }).ToList(); var resultData = new List <string>(); for (int index = 0; index < data.Count; index++) { var d = data[index]; var cp1 = contactsPartner1[index]; var cp2 = contactsPartner2[index]; var ip1 = interfacePartner1[index]; var ip2 = interfacePartner2[index]; if (d == null || cp1 == null || cp2 == null || ip1 == null || ip2 == null) { continue; } if (cp1.Count == 0 || cp2.Count == 0) { continue; } var p1c1_pdb = FindAtomicContacts.PdbAtomicChains(@"c:\pdb\new_data_set\" + d.InteractionChainsPdb1 + ".pdb", new string[] { d.InteractionChainsPdb1Chain1 }, -1, -1, true); var p1c2_pdb = FindAtomicContacts.PdbAtomicChains(@"c:\pdb\new_data_set\" + d.InteractionChainsPdb1 + ".pdb", new string[] { d.InteractionChainsPdb1Chain2 }, -1, -1, true); var p2c1_pdb = FindAtomicContacts.PdbAtomicChains(@"c:\pdb\new_data_set\" + d.InteractionChainsPdb2 + ".pdb", new string[] { d.InteractionChainsPdb2Chain1 }, -1, -1, true); var p2c2_pdb = FindAtomicContacts.PdbAtomicChains(@"c:\pdb\new_data_set\" + d.InteractionChainsPdb2 + ".pdb", new string[] { d.InteractionChainsPdb2Chain2 }, -1, -1, true); var p1c1_res_seq = p1c1_pdb.ChainList.First().AtomList.Select(a => int.Parse(a.resSeq.FieldValue)).ToList(); var p1c2_res_seq = p1c2_pdb.ChainList.First().AtomList.Select(a => int.Parse(a.resSeq.FieldValue)).ToList(); var p2c1_res_seq = p2c1_pdb.ChainList.First().AtomList.Select(a => int.Parse(a.resSeq.FieldValue)).ToList(); var p2c2_res_seq = p2c2_pdb.ChainList.First().AtomList.Select(a => int.Parse(a.resSeq.FieldValue)).ToList(); var cp1a1_res_seq = cp1.Select(a => int.Parse(a.Atom1.resSeq.FieldValue)).ToList(); var cp1a2_res_seq = cp1.Select(a => int.Parse(a.Atom2.resSeq.FieldValue)).ToList(); var cp2a1_res_seq = cp2.Select(a => int.Parse(a.Atom1.resSeq.FieldValue)).ToList(); var cp2a2_res_seq = cp2.Select(a => int.Parse(a.Atom2.resSeq.FieldValue)).ToList(); var cp1a1_min = cp1a1_res_seq.Min(); var cp1a1_max = cp1a1_res_seq.Max(); var cp1a2_min = cp1a2_res_seq.Min(); var cp1a2_max = cp1a2_res_seq.Max(); var cp2a1_min = cp2a1_res_seq.Min(); var cp2a1_max = cp2a1_res_seq.Max(); var cp2a2_min = cp2a2_res_seq.Min(); var cp2a2_max = cp2a2_res_seq.Max(); var cp1a1_best50_min = int.MinValue; var cp1a1_best50_max = int.MinValue; var cp1a1_best50_interactions = int.MinValue; var cp1a1_best50_middle_finder = new List <Tuple <int, int, int> >(); var interface_target_length = 50; for (var x = cp1a1_min - interface_target_length; x <= cp1a1_max; x++) { if (Math.Abs(cp1a1_max - cp1a1_min) <= interface_target_length) { cp1a1_best50_min = cp1a1_min; cp1a1_best50_max = cp1a1_max; cp1a1_best50_interactions = cp1a1_res_seq.Count; break; } var min = x; var max = x + interface_target_length > cp1a1_max ? cp1a1_max : x + interface_target_length; var best50 = cp1a1_res_seq.Count(a => a >= cp1a1_best50_min && a <= cp1a1_best50_max); if (best50 == cp1a1_best50_interactions) { cp1a1_best50_middle_finder.Add(new Tuple <int, int, int>(min, max, best50)); } if (cp1a1_best50_interactions == int.MinValue || best50 > cp1a1_best50_interactions) { cp1a1_best50_middle_finder.Clear(); cp1a1_best50_middle_finder.Add(new Tuple <int, int, int>(min, max, best50)); cp1a1_best50_min = min; cp1a1_best50_max = max; cp1a1_best50_interactions = best50; } if (x + interface_target_length >= cp1a1_max) { break; } } if (cp1a1_best50_middle_finder.Count > 2) { var middle = cp1a1_best50_middle_finder[cp1a1_best50_middle_finder.Count / 2]; cp1a1_best50_min = middle.Item1; cp1a1_best50_max = middle.Item2; cp1a1_best50_interactions = middle.Item3; } var cp1a2_best50_min = int.MinValue; var cp1a2_best50_max = int.MinValue; var cp1a2_best50_interactions = int.MinValue; var cp1a2_best50_middle_finder = new List <Tuple <int, int, int> >(); for (var x = cp1a2_min - interface_target_length; x <= cp1a2_max; x++) { if (Math.Abs(cp1a2_max - cp1a2_min) <= interface_target_length) { cp1a2_best50_min = cp1a2_min; cp1a2_best50_max = cp1a2_max; cp1a2_best50_interactions = cp1a2_res_seq.Count; break; } var min = x; var max = x + interface_target_length > cp1a2_max ? cp1a2_max : x + interface_target_length; var best50 = cp1a2_res_seq.Count(a => a >= cp1a2_best50_min && a <= cp1a2_best50_max); if (best50 == cp1a2_best50_interactions) { cp1a2_best50_middle_finder.Add(new Tuple <int, int, int>(min, max, best50)); } if (cp1a2_best50_interactions == int.MinValue || best50 > cp1a2_best50_interactions) { cp1a2_best50_middle_finder.Clear(); cp1a2_best50_middle_finder.Add(new Tuple <int, int, int>(min, max, best50)); cp1a2_best50_min = min; cp1a2_best50_max = max; cp1a2_best50_interactions = best50; } if (x + interface_target_length >= cp1a2_max) { break; } } if (cp1a2_best50_middle_finder.Count > 2) { var middle = cp1a2_best50_middle_finder[cp1a2_best50_middle_finder.Count / 2]; cp1a2_best50_min = middle.Item1; cp1a2_best50_max = middle.Item2; cp1a2_best50_interactions = middle.Item3; } var cp2a1_best50_min = int.MinValue; var cp2a1_best50_max = int.MinValue; var cp2a1_best50_interactions = int.MinValue; var cp2a1_best50_middle_finder = new List <Tuple <int, int, int> >(); for (var x = cp2a1_min - interface_target_length; x <= cp2a1_max; x++) { if (Math.Abs(cp2a1_max - cp2a1_min) <= interface_target_length) { cp2a1_best50_min = cp2a1_min; cp2a1_best50_max = cp2a1_max; cp2a1_best50_interactions = cp2a1_res_seq.Count; break; } var min = x; var max = x + interface_target_length > cp2a1_max ? cp2a1_max : x + interface_target_length; var best50 = cp2a1_res_seq.Count(a => a >= cp2a1_best50_min && a <= cp2a1_best50_max); if (best50 == cp2a1_best50_interactions) { cp2a1_best50_middle_finder.Add(new Tuple <int, int, int>(min, max, best50)); } if (cp2a1_best50_interactions == int.MinValue || best50 > cp2a1_best50_interactions) { cp2a1_best50_middle_finder.Clear(); cp2a1_best50_middle_finder.Add(new Tuple <int, int, int>(min, max, best50)); cp2a1_best50_min = min; cp2a1_best50_max = max; cp2a1_best50_interactions = best50; } if (x + interface_target_length >= cp2a1_max) { break; } } if (cp2a1_best50_middle_finder.Count > 2) { var middle = cp2a1_best50_middle_finder[cp2a1_best50_middle_finder.Count / 2]; cp2a1_best50_min = middle.Item1; cp2a1_best50_max = middle.Item2; cp2a1_best50_interactions = middle.Item3; } var cp2a2_best50_min = int.MinValue; var cp2a2_best50_max = int.MinValue; var cp2a2_best50_interactions = int.MinValue; var cp2a2_best50_middle_finder = new List <Tuple <int, int, int> >(); for (var x = cp2a2_min - interface_target_length; x <= cp2a2_max; x++) { if (Math.Abs(cp2a2_max - cp2a2_min) <= interface_target_length) { cp2a2_best50_min = cp2a2_min; cp2a2_best50_max = cp2a2_max; cp2a2_best50_interactions = cp2a2_res_seq.Count; break; } var min = x; var max = x + interface_target_length > cp2a2_max ? cp2a2_max : x + interface_target_length; var best50 = cp2a2_res_seq.Count(a => a >= cp2a2_best50_min && a <= cp2a2_best50_max); if (best50 == cp2a2_best50_interactions) { cp2a2_best50_middle_finder.Add(new Tuple <int, int, int>(min, max, best50)); } if (cp2a2_best50_interactions == int.MinValue || best50 > cp2a2_best50_interactions) { cp2a2_best50_middle_finder.Clear(); cp2a2_best50_middle_finder.Add(new Tuple <int, int, int>(min, max, best50)); cp2a2_best50_min = min; cp2a2_best50_max = max; cp2a2_best50_interactions = best50; } if (x + interface_target_length >= cp2a2_max) { break; } } if (cp2a2_best50_middle_finder.Count > 2) { var middle = cp2a2_best50_middle_finder[cp2a2_best50_middle_finder.Count / 2]; cp2a2_best50_min = middle.Item1; cp2a2_best50_max = middle.Item2; cp2a2_best50_interactions = middle.Item3; } var cp1a1_interface = string.Join("", p1c1_pdb.ChainList.First().AtomList.Where(a => { var l = int.Parse(a.resSeq.FieldValue); return(l >= cp1a1_best50_min && l <= cp1a1_best50_max); }).OrderBy(c => int.Parse(c.resSeq.FieldValue)).Select(b => AminoAcidConversions.AminoAcidNameToCode1L(b.resName.FieldValue)).ToList()); var cp1a2_interface = string.Join("", p1c2_pdb.ChainList.First().AtomList.Where(a => { var l = int.Parse(a.resSeq.FieldValue); return(l >= cp1a2_best50_min && l <= cp1a2_best50_max); }).OrderBy(c => int.Parse(c.resSeq.FieldValue)).Select(b => AminoAcidConversions.AminoAcidNameToCode1L(b.resName.FieldValue)).ToList()); var cp2a1_interface = string.Join("", p2c1_pdb.ChainList.First().AtomList.Where(a => { var l = int.Parse(a.resSeq.FieldValue); return(l >= cp2a1_best50_min && l <= cp2a1_best50_max); }).OrderBy(c => int.Parse(c.resSeq.FieldValue)).Select(b => AminoAcidConversions.AminoAcidNameToCode1L(b.resName.FieldValue)).ToList()); var cp2a2_interface = string.Join("", p2c2_pdb.ChainList.First().AtomList.Where(a => { var l = int.Parse(a.resSeq.FieldValue); return(l >= cp2a2_best50_min && l <= cp2a2_best50_max); }).OrderBy(c => int.Parse(c.resSeq.FieldValue)).Select(b => AminoAcidConversions.AminoAcidNameToCode1L(b.resName.FieldValue)).ToList()); var cp1a1_interface_interactions = new string('_', cp1a1_interface.Length); cp1a1_interface_interactions = string.Join("", cp1a1_interface_interactions.Select((a, i) => cp1a1_res_seq.Contains(i + cp1a1_best50_min) ? "X" : "_").ToList()); var cp1a2_interface_interactions = new string('_', cp1a2_interface.Length); cp1a2_interface_interactions = string.Join("", cp1a2_interface_interactions.Select((a, i) => cp1a2_res_seq.Contains(i + cp1a2_best50_min) ? "X" : "_").ToList()); var cp2a1_interface_interactions = new string('_', cp2a1_interface.Length); cp2a1_interface_interactions = string.Join("", cp2a1_interface_interactions.Select((a, i) => cp2a1_res_seq.Contains(i + cp2a1_best50_min) ? "X" : "_").ToList()); var cp2a2_interface_interactions = new string('_', cp2a2_interface.Length); cp2a2_interface_interactions = string.Join("", cp2a2_interface_interactions.Select((a, i) => cp2a2_res_seq.Contains(i + cp2a2_best50_min) ? "X" : "_").ToList()); resultData.Add(string.Join(",", new string[] { d.DomainSuperFamily, d.InteractionChainsPdb1, d.InteractionChainsPdb1Chain1, "" + ip1.Item1, "" + ip1.Item2, "" + cp1a1_best50_min, "" + cp1a1_best50_max, "" + cp1a1_best50_interactions, cp1a1_interface, cp1a1_interface_interactions, d.InteractionChainsPdb1Chain2, "" + ip1.Item3, "" + ip1.Item4, "" + cp1a2_best50_min, "" + cp1a2_best50_max, "" + cp1a2_best50_interactions, cp1a2_interface, cp1a2_interface_interactions, d.InteractionChainsPdb2, d.InteractionChainsPdb2Chain1, "" + ip2.Item1, "" + ip2.Item2, "" + cp2a1_best50_min, "" + cp2a1_best50_max, "" + cp2a1_best50_interactions, cp2a1_interface, cp2a1_interface_interactions, d.InteractionChainsPdb2Chain2, "" + ip2.Item3, "" + ip2.Item4, "" + cp2a2_best50_min, "" + cp2a2_best50_max, "" + cp2a2_best50_interactions, cp2a2_interface, cp2a2_interface_interactions, })); } resultData.Insert(0, string.Join(",", new string[] { "super family", "partner 1 pdb id", "partner 1 chain id 1", "p1c1 interface start", "p1c1 interface end", "p1c1 best 50 start", "p1c1 best 50 end", "p1c1 best 50 interactions", "p1c1 interface seq", "p1c1 interface mask", "partner 1 chain id 2", "p1c2 interface start", "p1c1 interface end", "p1c2 best 50 start", "p1c2 best 50 end", "p1c2 best 50 interactions", "p1c2 interface seq", "p1c2 interface mask", "partner 2 pdb id", "partner 2 chain id 1", "p2c1 interface start", "p2c1 interface end", "p2c1 best 50 start", "p2c1 best 50 end", "p2c1 best 50 interactions", "p2c1 interface seq", "p2c1 interface mask", "partner 2 chain id 2", "p2c2 interface start", "p2c2 interface end", "p2c2 best 50 start", "p2c2 best 50 end", "p2c2 best 50 interactions", "p2c2 interface seq", "p2c2 interface mask", })); File.WriteAllLines(@"c:\pdb\new_data_set_results.csv", resultData); return; }
static void Main(string[] args) { var pdbFolder = @"C:\ds96ub_homologs\"; var homologClusterData = FindHomologsCluster.FindHomologsCluster.HomologClusterData.Load( @"c:\ds96ub_homologs\ds96ub_homologs_0.7.csv"); var pdbFiles = Directory.GetFiles(pdbFolder, "*.pdb", SearchOption.TopDirectoryOnly); var pdbIdList = pdbFiles.Select(FindAtomicContacts.PdbIdFromPdbFilename).ToList(); // only ca-atoms, ters and endmdls var pdbAtomsText = pdbFiles.Select( a => File.ReadAllLines(a) .Where(b => (b.StartsWith("ATOM ") && b[13] == 'C' && b[14] == 'A') || /*b.StartsWith("TER ") ||*/ b.StartsWith("ENDMDL ")) .ToList()).ToList(); // only first nmr model pdbAtomsText = pdbAtomsText.Select(a => { var x = a.FindIndex(b => b.StartsWith("ENDMDL ")); return(x == -1 ? a : a.GetRange(0, x - 1)); }).ToList(); var pdbAtoms = pdbAtomsText.Select(a => a.Select(b => new ATOM_Record(b)).ToList()).ToList(); // get list of unique chain ids var pdbChainIds = pdbAtoms.Select((a, i) => a.Select(b => char.ToUpperInvariant(b.chainID.FieldValue[0])).ToList()).Distinct().ToList(); var pdbIdChainIdList = new List <Tuple <string, char> >(); for (var i = 0; i < pdbIdList.Count; i++) { pdbIdChainIdList.AddRange(pdbChainIds[i].Select(chainId => new Tuple <string, char>(pdbIdList[i], chainId))); } pdbIdChainIdList = pdbIdChainIdList.Distinct().ToList(); // for each chain var pdbContacts = pdbIdChainIdList.Select(a => { var x = FindAtomicContacts.AtomPair.LoadAtomPairList(@"C:\ds96ub_homologs\contacts\contacts_pdb" + a.Item1.ToUpperInvariant() + ".pdb") .Where(b => char.ToUpperInvariant(b.Atom1.chainID.FieldValue[0]) == a.Item2 || char.ToUpperInvariant(b.Atom2.chainID.FieldValue[0]) == a.Item2) .Select(c => { if (char.ToUpperInvariant(c.Atom1.chainID.FieldValue[0]) != a.Item2) { c.SwapAtoms(); } return(c); }).ToList(); return(x); }).ToList(); // res min, res max, best min, best max, interface aa, interface mask var pdbInterfaces = new List <Ds96UbInterface>(); var interface_target_length = 50; for (int index = 0; index < pdbContacts.Count; index++) { var pdbId = pdbIdChainIdList[index].Item1; var chainId = pdbIdChainIdList[index].Item2; var pdbContact = pdbContacts[index]; if (pdbContact.Count == 0) { continue; } var contactChains = pdbContact.Where(a => char.ToUpperInvariant(a.Atom2.chainID.FieldValue[0]) != chainId).Select(a => char.ToUpperInvariant(a.Atom2.chainID.FieldValue[0])).Distinct().ToList(); foreach (var contactChain in contactChains) { var pdbContactsResSeqIds = pdbContact.Where(a => char.ToUpperInvariant(a.Atom1.chainID.FieldValue[0]) == chainId && char.ToUpperInvariant(a.Atom2.chainID.FieldValue[0]) == contactChain) .Select(a => int.Parse(a.Atom1.resSeq.FieldValue)) .ToList(); var res_seq = pdbContactsResSeqIds; var min_res_seq = pdbContactsResSeqIds.Min(); var max_res_seq = pdbContactsResSeqIds.Max(); var best50_min = int.MinValue; var best50_max = int.MinValue; var best50_interactions = int.MinValue; var best50_middle_finder = new List <Tuple <int, int, int> >(); for (var x = min_res_seq - interface_target_length; x <= max_res_seq; x++) { if (Math.Abs(max_res_seq - min_res_seq) <= interface_target_length) { best50_min = min_res_seq; best50_max = max_res_seq; best50_interactions = res_seq.Count; break; } var min = x; var max = x + interface_target_length > max_res_seq ? max_res_seq : x + interface_target_length; var best50 = res_seq.Count(a => a >= best50_min && a <= best50_max); if (best50 == best50_interactions) { best50_middle_finder.Add(new Tuple <int, int, int>(min, max, best50)); } if (best50_interactions == int.MinValue || best50 > best50_interactions) { best50_middle_finder.Clear(); best50_middle_finder.Add(new Tuple <int, int, int>(min, max, best50)); best50_min = min; best50_max = max; best50_interactions = best50; } if (x + interface_target_length >= max) { break; } } if (best50_middle_finder.Count > 2) { var middle = best50_middle_finder[best50_middle_finder.Count / 2]; best50_min = middle.Item1; best50_max = middle.Item2; best50_interactions = middle.Item3; } var best50_interface_atoms = pdbAtoms[pdbIdList.IndexOf(pdbId)].Where(a => { var l = int.Parse(a.resSeq.FieldValue); var c = char.ToUpperInvariant(a.chainID.FieldValue[0]); return(c == chainId && l >= best50_min && l <= best50_max); }).ToList(); best50_interface_atoms = best50_interface_atoms.OrderBy(c => int.Parse(c.resSeq.FieldValue)).ToList(); var best50_interface = string.Join("", best50_interface_atoms.Select(b => AminoAcidConversions.AminoAcidNameToCode1L(b.resName.FieldValue)).ToList()); var best50_mask = new string('_', best50_interface.Length); best50_mask = string.Join("", best50_mask.Select((a, i) => res_seq.Contains(i + best50_min) ? "X" : "_").ToList()); pdbInterfaces.Add(new Ds96UbInterface(pdbId, chainId, contactChain, min_res_seq, max_res_seq, best50_min, best50_max, best50_interactions, best50_interface, best50_mask, -1, "", "", 0, -1, "", "", 0)); } } var homologClusterIndexes = homologClusterData.Select(a => a.ClusterIndex).Distinct().ToList(); var homologClusters = homologClusterIndexes.Select(a => homologClusterData.Where(b => b.ClusterIndex == a).ToList()).ToList(); var pdbInterfacesSorted = homologClusters.Select(a => pdbInterfaces.Where(b => a.Any(c => c.PdbId == b.PdbId && (char.ToUpperInvariant(c.ChainId[0]) == b.ChainId1 || char.ToUpperInvariant(c.ChainId[0]) == b.ChainId2))).ToList()).ToList(); var outputData = new List <string>(); foreach (var clusterIndex in homologClusterIndexes) { var cluster = pdbInterfacesSorted[clusterIndex - 1]; // currently, cluster is a list of chain1-->chain2 interfaces ... so the 'chain2' interface needs adding to the record foreach (var inf1 in cluster) { var partner = cluster.Where(a => a != inf1 && a.PdbId == inf1.PdbId && inf1.ChainId2 == a.ChainId1) .OrderByDescending( a => InterfaceOverlap(inf1.MinResSeq, inf1.MaxResSeq, a.MinResSeq, a.MaxResSeq)) .ToList(); var first = partner.FirstOrDefault(); if (first != null) { inf1.Partner1InterfaceAminoAcids = first.InterfaceAminoAcids; inf1.Partner1InterfaceInteractionsMask = first.InterfaceInteractionsMask; inf1.Partner1InterfaceOverlap = InterfaceOverlap(inf1.MinResSeq, inf1.MaxResSeq, first.MinResSeq, first.MaxResSeq); } var second = partner.ElementAtOrDefault(1); if (second != null) { inf1.Partner2InterfaceAminoAcids = second.InterfaceAminoAcids; inf1.Partner2InterfaceInteractionsMask = second.InterfaceInteractionsMask; inf1.Partner2InterfaceOverlap = InterfaceOverlap(inf1.MinResSeq, inf1.MaxResSeq, second.MinResSeq, second.MaxResSeq); } } cluster = cluster.Where(a => a.Partner1InterfaceOverlap > 0 || a.Partner2InterfaceOverlap > 0).ToList(); /* * var partners = * foreach (var inf2 in cluster) * { * if (inf1.PdbId!=inf2.PdbId) continue; * * if (inf1==inf2) continue; * * if (!(inf1.ChainId1==inf2.ChainId2 || inf1.ChainId2==inf2.ChainId1)) continue; * * // * var overlap = InterfaceOverlap(inf1.MinResSeq, inf1.MaxResSeq, inf2.MinResSeq, inf2.MaxResSeq); * * if (overlap > 0) * { * if (overlap > inf1.Partner1InterfaceOverlap) * { * inf1.Partner1InterfaceOverlap = overlap; * inf1.Partner1InterfaceAminoAcids = inf2.InterfaceAminoAcids; * inf1.Partner1InterfaceInteractionsMask = inf2.InterfaceInteractionsMask; * } * * if (overlap > inf2.Partner1InterfaceOverlap) * { * inf2.Partner1InterfaceOverlap = overlap; * inf2.Partner1InterfaceAminoAcids = inf1.InterfaceAminoAcids; * inf2.Partner1InterfaceInteractionsMask = inf1.InterfaceInteractionsMask; * } * } * } * } */ //var interfaces = cluster.Select(a => a.InterfaceAminoAcids).ToList(); //interfaces = interfaces.Where(a => interfaces.Count(b => b == a) > 1).ToList(); //cluster = cluster.Where(a => a.InterfaceAminoAcids.Length >= 5 && cluster.Count(b => b.InterfaceAminoAcids == a.InterfaceAminoAcids) > 1).ToList(); cluster = cluster.Where(a => a.InterfaceAminoAcids.Length >= 5).ToList(); var clusterInterfaces = cluster.Select(a => a.InterfaceAminoAcids).ToList(); var homologInterfaces = new List <List <string> >(); foreach (var inf1 in clusterInterfaces) { var highest_score = decimal.MinValue; string highest_inf = null; foreach (var inf2 in clusterInterfaces) { if (inf1 == inf2) { continue; } var score = FindAtomicContacts.AlignedSequenceSimilarityPercentage(inf1, inf2, FindAtomicContacts.AlignmentType.NMW); if (score.Item1 > highest_score) { highest_score = score.Item1; highest_inf = inf2; } } var y = homologInterfaces.FirstOrDefault(a => a.Contains(inf1) || a.Contains(highest_inf)); if (y != null) { if (!y.Contains(inf1)) { y.Add(inf1); } if (!y.Contains(highest_inf)) { y.Add(highest_inf); } } else { var z = new List <string>(); z.Add(inf1); z.Add(highest_inf); homologInterfaces.Add(z); } } foreach (var c in cluster) { c.Partner1ClusterIndex = homologInterfaces.FindIndex(b => b.Contains(c.Partner1InterfaceAminoAcids)); c.Partner2ClusterIndex = homologInterfaces.FindIndex(b => b.Contains(c.Partner2InterfaceAminoAcids)); } for (int index = 0; index < homologInterfaces.Count; index++) { var homologInterface = homologInterfaces[index]; var cluster2 = cluster.Where(a => homologInterface.Contains(a.InterfaceAminoAcids) ) .OrderBy(a => a.Partner1ClusterIndex) .ThenBy(a => a.Partner2ClusterIndex) .ThenBy(a => a.InterfaceAminoAcids) .ThenBy(a => a.Partner1InterfaceAminoAcids) .ThenBy(a => a.Partner2InterfaceAminoAcids) .ToList(); var partners = cluster2.Select( a => new Tuple <string, string, string>(a.InterfaceAminoAcids, a.Partner1InterfaceAminoAcids, a.Partner2InterfaceAminoAcids)).Distinct(); cluster2 = partners.Select( a => cluster2.FirstOrDefault( b => b.InterfaceAminoAcids == a.Item1 && b.Partner1InterfaceAminoAcids == a.Item2 && b.Partner2InterfaceAminoAcids == a.Item3)).ToList(); outputData.Add("cluster " + clusterIndex + "." + index); outputData.AddRange(cluster2.Select(a => a.ToString()).ToList()); outputData.Add(""); } } File.WriteAllLines(@"c:\ds96ub_homologs\ds96ub_homologs_interfaces.csv", outputData);//pdbInterfaces.Select(a=>a.ToString()).ToList()); }