public static void LoadReceptor(string mainReceptorPdbId) { Debug.WriteLine(mainReceptorPdbId); //var mainReceptorPdbId = "2SICE"; //if (mainReceptorPdbId != "1KXPA" && mainReceptorPdbId != "2SICE") return; if (mainReceptorPdbId.Length != 5) { return; } mainReceptorPdbId = mainReceptorPdbId.ToUpperInvariant(); var pdbFile = @"c:\pdbe\" + mainReceptorPdbId.Substring(0, 4) + ".pdb"; var ibisFile = @"c:\phd\ibis\" + mainReceptorPdbId.Substring(1, 2) + @"\" + mainReceptorPdbId.Substring(0, 4) + ".txt"; var ibisData = IbisData.Load(ibisFile); if (ibisData == null || ibisData.Count == 0) { return; } ibisData = ibisData.Where(a => a.Query.ToUpperInvariant() == mainReceptorPdbId.ToUpperInvariant()).ToList(); ibisData = ibisData.Where(a => a.Interaction_type == "PPI").ToList(); ibisData = ibisData.Where(a => { var pdbEvience = a.PDB_Evidence.Trim().Split('_'); var rec = pdbEvience[0]; var lig = pdbEvience[1]; return /*pdbEvience[0]== mainReceptorPdbId ||*/ (rec.Length == 5 && lig.Length == 5 && rec != lig); }).ToList(); // at least one matching resid? //var mmResNos = ibisData.Select(a => a.Mmdb_Residue_No.Trim().Split(new[] {' '}, StringSplitOptions.RemoveEmptyEntries)).Select(b => b.Select(int.Parse).ToList()).ToList(); //ibisData = ibisData.Where((a, i) => mmResNos.Any(b => mmResNos.IndexOf(b) != i && b.Intersect(mmResNos[i]).Any())).ToList(); // at least one matching domain? const int minLigands = 2; const int maxLigands = 400; if (ibisData.Count >= minLigands && ibisData.Count <= maxLigands) { Directory.CreateDirectory(@"c:\pdbe_split\receptors\" + mainReceptorPdbId + @"\"); var pdbEvidenceList = ibisData.Select(a => new Tuple <string, List <int> >(a.PDB_Evidence, a.PDB_Residue_No.Trim().Split(' ').Select(b => int.Parse(string.Join("", b.Where(c => char.IsNumber(c))))).ToList())).ToList(); var pdbsumInteractonList = pdbEvidenceList.Select(a => { var origPdbFile = @"c:\pdbe\" + a.Item1.Substring(0, 4) + ".pdb"; var recChain = a.Item1[4]; var ligChain = a.Item1[10]; var outPdb = PdbRecLigFile(mainReceptorPdbId, a.Item1, -1); //ExtractAtoms(origPdbFile, "-", evidenceChains, outPdb); var pdbsumInteractions = TruncateDimerContacts(origPdbFile, recChain, ligChain, outPdb); var ibisInteractions = a.Item2; return(new Tuple <string, List <int>, List <int> >(a.Item1, pdbsumInteractions, ibisInteractions)); }).ToList(); //pdbEvidenceList = pdbEvidenceList.Where((a, i) => success[i] || a.StartsWith(mainReceptorPdbId)).ToList(); //pdbEvidenceList = pdbEvidenceList.Where((a, i) => pdbsumInteractonList[i].Item2 != null).ToList(); pdbsumInteractonList = pdbsumInteractonList.Where(a => a.Item2 != null).ToList(); File.WriteAllLines(@"c:\pdbe_split\receptors\" + mainReceptorPdbId + @"\rec_lig_list_" + mainReceptorPdbId + ".txt", pdbsumInteractonList.Select(a => string.Join("\t", new List <string>() { a.Item1, string.Join("+", a.Item2), string.Join("+", a.Item3) }))); PymolAlignmentCmds(mainReceptorPdbId, pdbsumInteractonList, @"c:\pdbe_split\receptors\" + mainReceptorPdbId + @"\pymol_images.py"); var batFilename = @"c:\pdbe_split\receptors\" + mainReceptorPdbId + @"\pymol_images.bat"; var batData = File.ReadAllLines(@"c:\phd\modeller_scripts\pymol_images.bat"); for (var i = 0; i <= 4; i++) { File.WriteAllLines(Path.GetDirectoryName(batFilename) + @"\" + Path.GetFileNameWithoutExtension(batFilename) + "_" + i + Path.GetExtension(batFilename), batData.Select(a => a.Replace("pymol_images.py", "pymol_images_" + i + ".py").Replace("pymol -e", "pymol")).ToList()); } var start = false; if (start) { var process = new Process() { StartInfo = new ProcessStartInfo(batFilename) }; process.StartInfo.WorkingDirectory = Path.GetDirectoryName(batFilename); process.Start(); process.WaitForExit(); Thread.Sleep(1000); } } }
public static void Main(string[] args) { var ligandInterfaceList = new List <LigandInterface>(); var proteinList = File.ReadAllLines(@"c:\phd\search_mimicry_pdb_list.txt"); if (proteinList.Any(a => a.Length != 5)) { throw new Exception("Wrong pdb code length"); } List <Tuple <string, decimal> > crystalToSeqQuality = new List <Tuple <string, decimal> >(); List <Tuple <string, string, decimal> > homology = new List <Tuple <string, string, decimal> >(); if (File.Exists(@"c:\phd\pdb_quality.csv")) { var q = File.ReadAllLines(@"c:\phd\pdb_quality.csv"); crystalToSeqQuality = q.Select(a => new Tuple <string, decimal>(a.Split(' ')[0], decimal.Parse(a.Split(' ')[1]))).ToList(); } if (File.Exists(@"c:\phd\pdb_homology.csv")) { var h = File.ReadAllLines(@"c:\phd\pdb_homology.csv"); homology = h.Select(a => new Tuple <string, string, decimal>(a.Split(' ')[0], a.Split(' ')[1], decimal.Parse(a.Split(' ')[2]))).ToList(); } foreach (var mainReceptorPdbId in proteinList) { Debug.WriteLine(mainReceptorPdbId); var chain = mainReceptorPdbId[4]; var pdbFile = @"c:\pdbe\" + mainReceptorPdbId.Substring(0, 4) + ".pdb"; var ibisFile = @"c:\phd\ibis\" + mainReceptorPdbId.Substring(1, 2) + @"\" + mainReceptorPdbId.Substring(0, 4) + ".txt"; var ibisRecordList = IbisData.Load(ibisFile); ibisRecordList = ibisRecordList.Where(a => a.Interaction_type == "PPI").ToList(); ibisRecordList = ibisRecordList.Where(a => a.Query == mainReceptorPdbId && /*a.PDB_Evidence.StartsWith(mainReceptorPdbId) &&*/ !a.PDB_Evidence.EndsWith(mainReceptorPdbId)).ToList(); ibisRecordList = ibisRecordList.Where(a => a.PDB_Evidence.Substring(0, 5) != a.PDB_Evidence.Substring(6, 5)).ToList(); var ligandPdbIds = ibisRecordList.Select(a => a.PDB_Evidence.Substring(6, 5)).Distinct().ToList(); Debug.WriteLine("B: " + string.Join(", ", ligandPdbIds)); ligandPdbIds = FilterLigandHomology(ligandPdbIds, crystalToSeqQuality, homology); Debug.WriteLine("A: " + string.Join(", ", ligandPdbIds)); ibisRecordList = ibisRecordList.Where(a => ligandPdbIds.Any(b => a.PDB_Evidence.EndsWith(b))).ToList(); var currentLigandInterfaceList = new List <LigandInterface>(); foreach (var ibisRecord in ibisRecordList) { var ligandPdbId = ibisRecord.PDB_Evidence.Substring(6, 5); // check quality var ligandQuality = crystalToSeqQuality.FirstOrDefault(a => a.Item1 == ligandPdbId); if (ligandQuality == null) { ligandQuality = new Tuple <string, decimal>(ligandPdbId, PdbStructureQuality(ligandPdbId)); crystalToSeqQuality.Add(ligandQuality); File.WriteAllLines(@"c:\phd\pdb_quality.csv", crystalToSeqQuality.Select(a => a.Item1 + " " + a.Item2).ToList()); } var dsspFilename = ligandPdbId.Substring(0, 4).ToLowerInvariant() + ".dssp"; var dsspFullFilename = @"c:\dssp\" + dsspFilename; var downloadDssp = false; if (!File.Exists(dsspFullFilename) && downloadDssp) { var ftpDsspFile = @"ftp://ftp.cmbi.ru.nl/pub/molbio/data/dssp/" + dsspFilename; var client = new WebClient(); try { client.DownloadFile(ftpDsspFile, dsspFullFilename); } catch (Exception) { // throw; } //if (File.Exists(dsspFullFilename)) { //var data = File.ReadAllText(dsspFullFilename); //if (!data.Contains("\r\n") && data.Contains("\n")) { data = data.Replace("\n", "\r\n"); } //if (!data.Contains("\r\n") && data.Contains("\r")) { data = data.Replace("\r", "\r\n"); } } } foreach (var ibisRecord2 in ibisRecordList) { if (ibisRecord == ibisRecord2) { continue; } // check homology var ligandPdbId2 = ibisRecord2.PDB_Evidence.Substring(6, 5); var homologyPct = homology.FirstOrDefault(a => a.Item1 == ligandPdbId && a.Item2 == ligandPdbId2); if (homologyPct == null) { homologyPct = new Tuple <string, string, decimal>(ligandPdbId, ligandPdbId2, Homology(ligandPdbId, ligandPdbId2)); homology.Add(homologyPct); File.WriteAllLines(@"c:\phd\pdb_homology.csv", homology.Select(a => a.Item1 + " " + a.Item2 + " " + a.Item3).ToList()); } } var contacts = ibisRecord.PDB_Residue_No.Trim().Split(' ').Select(a => int.Parse(string.Join("", a.Where(b => char.IsNumber(b)).ToList()))).ToList(); var min = contacts.Min(); var max = contacts.Max(); var groups = new List <List <int> >(); foreach (var c in contacts) { var g = groups.FirstOrDefault(a => a.Contains(c) || a.Any(b => Math.Abs(c - b) <= 5)); if (g == null) { groups.Add(new List <int>() { c }); continue; } g.Add(c); } groups = groups.Where(g => g.Count >= 5 && g.Count <= 20).ToList(); var secStructs = groups.Select(g => ProteinBioinformaticsSharedLibrary.Dssp.DsspStructureSequence.LoadDsspStructureSequence(dsspFullFilename, "" + chain, g.Min(), g.Max())).ToList(); var mainSecStructs = secStructs.Select(a => a.GroupBy(i => i).OrderByDescending(grp => grp.Count()).Select(grp => grp.Key).FirstOrDefault()).ToList(); foreach (var @group in groups) { var index = groups.IndexOf(@group); var r = new LigandInterface() { MasterReceptor = mainReceptorPdbId, Receptor = ibisRecord.PDB_Evidence.Substring(0, 5), Ligand = ibisRecord.PDB_Evidence.Substring(6, 5), ReceptorInterfaceSecStruct = secStructs[index], ReceptorInterfaceMainSecStruct = mainSecStructs[index], ReceptorInterfacePdbResSeqIndexes = @group, ReceptorInterfaceFirst = @group.Min(), ReceptorInterfaceLast = @group.Max(), InterfaceIndex = groups.IndexOf(@group) + 1, }; currentLigandInterfaceList.Add(r); ligandInterfaceList.Add(r); } } var resSeqIndexes = currentLigandInterfaceList.SelectMany(a => a.ReceptorInterfacePdbResSeqIndexes).Distinct().OrderBy(a => a).ToList(); var currentLigandInterfaceListGroups = currentLigandInterfaceList.Select(a => new List <LigandInterface>() { a }).ToList(); foreach (var index in resSeqIndexes) { var lists = currentLigandInterfaceListGroups.Where(a => a.Any(b => b.ReceptorInterfacePdbResSeqIndexes.Contains(index))).ToList(); currentLigandInterfaceListGroups = currentLigandInterfaceListGroups.Except(lists).ToList(); var list = lists.SelectMany(a => a).ToList(); var sslist = list.GroupBy(a => a.ReceptorInterfaceMainSecStruct).ToList(); sslist.ForEach(list2 => currentLigandInterfaceListGroups.Add(list2.ToList())); } foreach (var g in currentLigandInterfaceListGroups) { g.ForEach(a => a.ContactCluster = currentLigandInterfaceListGroups.IndexOf(g) + 1); } //return; } //File.WriteAllLines(@"c:\phd\pdb_quality.csv", crystalToSeqQuality.Select(a => a.Item1 + " " + a.Item2).ToList()); //File.WriteAllLines(@"c:\phd\pdb_homology.csv", homology.Select(a => a.Item1 + " " + a.Item2 + " " + a.Item3).ToList()); LigandInterface.Save(ligandInterfaceList); }