public static ATOM_Record FindAtomInsideSingularInteractionsChain(ProteinChainListContainer singularAaToAaInteractions, int chainIndex, int residueSequenceIndex) { if (ParameterValidation.IsProteinChainListContainerNullOrEmpty(singularAaToAaInteractions)) { return(null); } if (ParameterValidation.IsChainIndexInvalid(chainIndex)) { throw new ArgumentOutOfRangeException(nameof(chainIndex)); } if (ParameterValidation.IsResidueSequenceIndexInvalid(residueSequenceIndex, true)) { throw new ArgumentOutOfRangeException(nameof(residueSequenceIndex)); } // Loop through atoms in specified chain to find atom with given residue sequence index for (int atomIndex = 0; atomIndex < singularAaToAaInteractions.ChainList[chainIndex].AtomList.Count; atomIndex++) { ATOM_Record atom = singularAaToAaInteractions.ChainList[chainIndex].AtomList[atomIndex]; if (ProteinDataBankFileOperations.NullableTryParseInt32(atom.resSeq.FieldValue) == residueSequenceIndex) { return(atom); } } return(null); }
public static string[,] MotifSpreadsheet(List <MotifHitSpreadsheetRecord> motifHitSpreadsheetRecordList) { if (motifHitSpreadsheetRecordList == null) { throw new ArgumentNullException(nameof(motifHitSpreadsheetRecordList)); } var result = new List <string[]> { Header().ToStrings() }; foreach (var record in motifHitSpreadsheetRecordList .OrderByDescending(a => ProteinDataBankFileOperations.NullableTryParseInt32(string.IsNullOrWhiteSpace(a.TotalDatabaseHitsPdb) ? "0" : a.TotalDatabaseHitsPdb) + ProteinDataBankFileOperations.NullableTryParseInt32(string.IsNullOrWhiteSpace(a.TotalDatabaseHitsUniProtKbSwissProt) ? "0" : a.TotalDatabaseHitsUniProtKbSwissProt) + ProteinDataBankFileOperations.NullableTryParseInt32(string.IsNullOrWhiteSpace(a.TotalDatabaseHitsUniProtKbTrEmbl) ? "0" : a.TotalDatabaseHitsUniProtKbTrEmbl)) .ThenByDescending(a => ProteinDataBankFileOperations.NullableTryParseInt32(string.IsNullOrWhiteSpace(a.TotalDatabaseSequencesPdb) ? "0" : a.TotalDatabaseSequencesPdb) + ProteinDataBankFileOperations.NullableTryParseInt32(string.IsNullOrWhiteSpace(a.TotalDatabaseSequencesUniProtKbSwissProt) ? "0" : a.TotalDatabaseSequencesUniProtKbSwissProt) + ProteinDataBankFileOperations.NullableTryParseInt32(string.IsNullOrWhiteSpace(a.TotalDatabaseSequencesUniProtKbTrEmbl) ? "0" : a.TotalDatabaseSequencesUniProtKbTrEmbl)) .ThenByDescending(a => ProteinDataBankFileOperations.NullableTryParseInt32(string.IsNullOrWhiteSpace(a.TotalTimesSuggestedFwd) ? "0" : a.TotalTimesSuggestedFwd) + ProteinDataBankFileOperations.NullableTryParseInt32(string.IsNullOrWhiteSpace(a.TotalTimesSuggestedRev) ? "0" : a.TotalTimesSuggestedRev) + ProteinDataBankFileOperations.NullableTryParseInt32(string.IsNullOrWhiteSpace(a.TotalTimesSuggestedMix) ? "0" : a.TotalTimesSuggestedMix))) { result.Add(record.ToStrings()); } return(ConvertTypes.StringJagged2DArrayTo2DArray(result.ToArray())); }
public static ATOM_Record FindAtomInsidePdbFileChain(ProteinChainListContainer pdbFileChains, int chainIndex, int residueSequenceIndex) { if (ParameterValidation.IsProteinChainListContainerNullOrEmpty(pdbFileChains)) { return(null); } if (ParameterValidation.IsChainIndexInvalid(chainIndex)) { throw new ArgumentOutOfRangeException(nameof(chainIndex)); } if (ParameterValidation.IsResidueSequenceIndexInvalid(residueSequenceIndex, true)) { throw new ArgumentOutOfRangeException(nameof(residueSequenceIndex)); } for (int memberIndex = 0; memberIndex < pdbFileChains.ChainList[chainIndex].AtomList.Count; memberIndex++) { ATOM_Record atom = pdbFileChains.ChainList[chainIndex].AtomList[memberIndex]; if (ProteinDataBankFileOperations.NullableTryParseInt32(atom.resSeq.FieldValue) == residueSequenceIndex) { return(atom); } } return(null); }
/// <summary> /// Get the lowest (minimum) and highest (maximum) residue sequence index (as found in the pdb file) in an interaction /// proteinInterface. /// </summary> /// <param name="proteinInterface"></param> /// <param name="singularAaToAaInteractions"></param> /// <param name="chainIndex"></param> /// <returns></returns> public static MinMax MinMaxResidueSequenceIndex(ClusteringFullResultListContainer.Chain.Stage.Cluster proteinInterface, ProteinChainListContainer singularAaToAaInteractions, int chainIndex) { if (ParameterValidation.IsClusterNullOrEmpty(proteinInterface)) { throw new ArgumentNullException(nameof(proteinInterface)); } if (ParameterValidation.IsProteinChainListContainerNullOrEmpty(singularAaToAaInteractions)) { throw new ArgumentNullException(nameof(singularAaToAaInteractions)); } if (ParameterValidation.IsChainIndexInvalid(chainIndex)) { throw new ArgumentOutOfRangeException(nameof(chainIndex)); } int proteinInterfaceMin = 0; int proteinInterfaceMax = 0; for (int memberIndex = 0; memberIndex < proteinInterface.AtomIndexList.Count; memberIndex++) { int member = proteinInterface.AtomIndexList[memberIndex]; ATOM_Record atom = singularAaToAaInteractions.ChainList[chainIndex].AtomList[member]; var residueSequenceIndex = ProteinDataBankFileOperations.NullableTryParseInt32(atom.resSeq.FieldValue); if (residueSequenceIndex == null) { continue; } if (memberIndex == 0 || residueSequenceIndex < proteinInterfaceMin) { proteinInterfaceMin = residueSequenceIndex.Value; } if (memberIndex == 0 || residueSequenceIndex > proteinInterfaceMax) { proteinInterfaceMax = residueSequenceIndex.Value; } } return(new MinMax(proteinInterfaceMin, proteinInterfaceMax)); }
/// <summary> /// This method finds chemical interaction bonds between atoms on separate chains. /// </summary> /// <param name="cancellationToken"></param> /// <param name="pdbFilename">The filename of the PDB file to parse for chemical interactions.</param> /// <param name="pdbIdChainIdList"></param> /// <param name="breakWhenFirstInteractionFound"></param> /// <param name="totalThreads"></param> /// <returns>Returns a list of atom pairs which are close enough in distance to have chemical interactions.</returns> public static List <AtomPair> FindInteractions(CancellationToken cancellationToken, decimal maxAtomInterationDistance /*= 8.0m*/, string pdbFilename, Dictionary <string, List <string> > pdbIdChainIdList, bool breakWhenFirstInteractionFound = false, int totalThreads = -1, bool sort = true, int requiredChains = -1) { if (ParameterValidation.IsLoadFilenameInvalid(pdbFilename)) // && ParameterValidation.IsProteinChainListContainerNullOrEmpty(pdbFileChains)) { throw new ArgumentOutOfRangeException(nameof(pdbFilename)); } string proteinId = ProteinDataBankFileOperations.PdbIdFromPdbFilename(pdbFilename); bool useCache = false; if (useCache && !string.IsNullOrWhiteSpace(proteinId)) { var cachedInteractions = InteractionsCache.LoadPdbInteractionCache(proteinId, requiredChains); if (cachedInteractions != null) { return(cachedInteractions); } } var chainIdList = pdbIdChainIdList != null ? (proteinId != null && pdbIdChainIdList.ContainsKey(proteinId) ? pdbIdChainIdList[proteinId].ToArray() : null) : null; ProteinChainListContainer proteinFileChains = ProteinDataBankFileOperations.PdbAtomicChains(pdbFilename, chainIdList, requiredChains, requiredChains, true); List <AtomPair> atomPairList = FindInteractions(cancellationToken, maxAtomInterationDistance, proteinId, pdbIdChainIdList, proteinFileChains, breakWhenFirstInteractionFound, totalThreads, sort, requiredChains); if (atomPairList == null) { // only save if null, otherwise, already saved in other method atomPairList = new List <AtomPair>(); if (useCache) { InteractionsCache.SavePdbInteractionCache(proteinId, atomPairList, requiredChains); } } return(atomPairList); }
public static int AtomIndexPositionInProteinInterface( ProteinChainListContainer singularAaToAaInteractions, ClusteringFullResultListContainer proteinInterfacesClusteringResult, int[] detectedBestStages, int chainIndex, int proteinInterfaceIndex, ATOM_Record atomPositionToFind) { List <ClusteringFullResultListContainer.Chain.Stage.Cluster> proteinInterfaceList = proteinInterfacesClusteringResult.ChainList[chainIndex].StageList[detectedBestStages[chainIndex]].ClusterList; List <ClusteringFullResultListContainer.Chain.Stage.Cluster> nonEmptyProteinInterfaceList = proteinInterfaceList.Where(a => a != null && a.AtomIndexList != null && a.AtomIndexList.Count > 0).ToList(); if (proteinInterfaceIndex > nonEmptyProteinInterfaceList.Count - 1) { throw new ArgumentOutOfRangeException(nameof(proteinInterfaceIndex), proteinInterfaceIndex, "proteinInterfaceIndex was greater than the number of proteinInterfaces found."); } ClusteringFullResultListContainer.Chain.Stage.Cluster proteinInterface = nonEmptyProteinInterfaceList[proteinInterfaceIndex]; if (proteinInterface.AtomIndexList.Count == 0) { return(-1); } MinMax minMaxResidueSequenceIndex = MinMaxResidueSequenceIndex(proteinInterface, singularAaToAaInteractions, chainIndex); int proteinInterfaceLength = CalculateProteinInterfaceLength(minMaxResidueSequenceIndex.Min, minMaxResidueSequenceIndex.Max); var residueSequenceToFind = ProteinDataBankFileOperations.NullableTryParseInt32(atomPositionToFind.resSeq.FieldValue); if (residueSequenceToFind == null) { return(-1); } int index = residueSequenceToFind.Value - minMaxResidueSequenceIndex.Min; // zero based return(index); }
public static string[] MakeInteractionsOutput(CancellationToken cancellationToken, decimal maxAtomInterationDistance, string[] pdbFilesList, Dictionary <string, List <string> > pdbIdChainIdList, ProgressActionSet progressActionSet, bool outputToGui) { var interactionTasks = new List <Task <string> >(); ProgressActionSet.StartAction(pdbFilesList.Length, progressActionSet); foreach (string pdbFilename in pdbFilesList) { string _pdbFilename = pdbFilename; while (interactionTasks.Count(t => t != null && !t.IsCompleted) >= Environment.ProcessorCount * 10) { Task.WaitAny(interactionTasks.ToArray <Task>()); } var interactionTask = Task.Run(() => { if (cancellationToken.IsCancellationRequested) { return(null); } List <AtomPair> interactionsList = SearchInteractions.FindInteractions(cancellationToken, maxAtomInterationDistance, _pdbFilename, pdbIdChainIdList); string pdbId = ProteinDataBankFileOperations.PdbIdFromPdbFilename(_pdbFilename); if (string.IsNullOrEmpty(pdbId)) { //pdbId = _pdbFilename; throw new ArgumentNullException(nameof(pdbFilesList), "The file " + _pdbFilename + " has an invalid name."); } if (interactionsList == null) { interactionsList = new List <AtomPair>(); } interactionsList = interactionsList.OrderBy(o => o.Distance).ToList(); var interactionsString = FormatInteractionOutput(pdbId, interactionsList); if (outputToGui) { ProgressActionSet.Report(interactionsString, progressActionSet); } ProgressActionSet.ProgressAction(1, progressActionSet); //ProgressActionSet.EstimatedTimeRemainingAction(startTicks, ); //////Console.WriteLine(_pdbFilename); return(interactionsString); }, cancellationToken); interactionTasks.Add(interactionTask); } Task.WaitAll(interactionTasks.Where(t => t != null && !t.IsCompleted).ToArray <Task>()); var interactionsStringsList = interactionTasks.OrderBy(t => t.Id).Where(t => t != null && t.IsCompleted && !t.IsCanceled && !t.IsFaulted && t.Result != null).Select(t => t.Result).ToArray(); ProgressActionSet.FinishAction(true, progressActionSet); return(interactionsStringsList); }
public static string[,] MotifSpreadsheet(List <ProproteinInterfaceSpreadsheetRecord> proproteinInterfaceSpreadsheetRecordList) { if (proproteinInterfaceSpreadsheetRecordList == null) { throw new ArgumentNullException(nameof(proproteinInterfaceSpreadsheetRecordList)); } var result = new List <string[]>(); result.Add(Header().ToStrings()); result.AddRange(proproteinInterfaceSpreadsheetRecordList.OrderByDescending(a => ProteinDataBankFileOperations.NullableTryParseInt32(a.TotalFound)).Select(record => record.ToStrings())); return(ConvertTypes.StringJagged2DArrayTo2DArray(result.ToArray())); }
public static List <MotifHitSpreadsheetRecord> MotifRecordList(Dictionary <string, MotifCounter> motifDistinctWithCount) { if (motifDistinctWithCount == null) { throw new ArgumentNullException(nameof(motifDistinctWithCount)); } var result = new List <MotifHitSpreadsheetRecord>(); foreach (var kvp in motifDistinctWithCount) { var motif = kvp.Key; var motifCounter = kvp.Value; ProproteinInterfaceMatchSet pdb = null; ProproteinInterfaceMatchSet sp = null; ProproteinInterfaceMatchSet tr = null; var tasks = new List <Task>() { Task.Run(() => pdb = ProproteinInterfaceServiceClient.LoadProproteinInterfaceResponse(new ScanProproteinInterfaceParameters() { sig = motif, db = ScanProproteinInterfaceParameters.TargetProteinDatabases.ProteinDataBank })), Task.Run(() => sp = ProproteinInterfaceServiceClient.LoadProproteinInterfaceResponse(new ScanProproteinInterfaceParameters() { sig = motif, db = ScanProproteinInterfaceParameters.TargetProteinDatabases.UniProtKbSwissProt })), Task.Run(() => tr = ProproteinInterfaceServiceClient.LoadProproteinInterfaceResponse(new ScanProproteinInterfaceParameters() { sig = motif, db = ScanProproteinInterfaceParameters.TargetProteinDatabases.UniProtKbTrEmbl })), }; Task.WaitAll(tasks.Where(t => !t.IsCompleted && !t.IsCanceled && !t.IsFaulted).ToArray()); var totalPdbMatch = pdb != null?ProteinDataBankFileOperations.NullableTryParseInt32(pdb.NMatch) : null; if (totalPdbMatch == null) { totalPdbMatch = -1; } var totalSpMatch = sp != null?ProteinDataBankFileOperations.NullableTryParseInt32(sp.NMatch) : null; if (totalSpMatch == null) { totalSpMatch = -1; } var totalTrMatch = tr != null?ProteinDataBankFileOperations.NullableTryParseInt32(tr.NMatch) : null; if (totalTrMatch == null) { totalTrMatch = -1; } var totalMatchOverall = ((totalPdbMatch > -1 ? totalPdbMatch : 0) + (totalSpMatch > -1 ? totalSpMatch : 0) + (totalTrMatch > -1 ? totalTrMatch : 0)); if (totalPdbMatch == -1 && totalSpMatch == -1 && totalTrMatch == -1) { totalMatchOverall = -1; } var totalPdbSeq = pdb != null?ProteinDataBankFileOperations.NullableTryParseInt32(pdb.NSeq) : null; if (totalPdbSeq == null) { totalPdbSeq = -1; } var totalSpSeq = sp != null?ProteinDataBankFileOperations.NullableTryParseInt32(sp.NSeq) : null; if (totalSpSeq == null) { totalSpSeq = -1; } var totalTrSeq = tr != null?ProteinDataBankFileOperations.NullableTryParseInt32(tr.NSeq) : null; if (totalTrSeq == null) { totalTrSeq = -1; } var totalSeqOverall = ((totalPdbSeq > -1 ? totalPdbSeq : 0) + (totalSpSeq > -1 ? totalSpSeq : 0) + (totalTrSeq > -1 ? totalTrSeq : 0)); if (totalPdbSeq == -1 && totalSpSeq == -1 && totalTrSeq == -1) { totalSeqOverall = -1; } var totalTimesOverall = (motifCounter.TotalFwd + motifCounter.TotalRev + motifCounter.TotalMix); var record = new MotifHitSpreadsheetRecord() { Motif = kvp.Key, //MotifTooGeneral = "" + kvp.Value.MotifTooGeneral, TotalTimesSuggestedFwd = "" + motifCounter.TotalFwd, TotalTimesSuggestedRev = "" + motifCounter.TotalRev, TotalTimesSuggestedMix = "" + motifCounter.TotalMix, TotalTimesSuggestedOverall = "" + totalTimesOverall, //TotalTimesSuggestedFwdInHeterodimers = "" + motifCounter.TotalFwdInHeterodimers, //TotalTimesSuggestedRevInHeterodimers = "" + motifCounter.TotalRevInHeterodimers, //TotalTimesSuggestedMixInHeterodimers = "" + motifCounter.TotalMixInHeterodimers, //TotalTimesSuggestedFwdInHomodimers = "" + motifCounter.TotalFwdInHomodimers, //TotalTimesSuggestedRevInHomodimers = "" + motifCounter.TotalRevInHomodimers, //TotalTimesSuggestedMixInHomodimers = "" + motifCounter.TotalMixInHomodimers, TotalDatabaseHitsPdb = "" + totalPdbMatch, TotalDatabaseSequencesPdb = "" + totalPdbSeq, TotalDatabaseHitsUniProtKbSwissProt = "" + totalSpMatch, TotalDatabaseSequencesUniProtKbSwissProt = "" + totalSpSeq, TotalDatabaseHitsUniProtKbTrEmbl = "" + totalTrMatch, TotalDatabaseSequencesUniProtKbTrEmbl = "" + totalTrSeq, TotalDatabaseHitsOverall = "" + totalMatchOverall, TotalSequencesOverall = "" + totalSeqOverall, }; result.Add(record); } return(result); }
public static string ProteinInterfaceSecondaryStructure(string pdbFilename, string chainId = null, int startResidueSequenceIndex = -1, int endResidueSequenceIndex = -1, bool reversedSequence = false) { if (string.IsNullOrWhiteSpace(pdbFilename)) { return(""); } var pdbId = ProteinDataBankFileOperations.PdbIdFromPdbFilename(pdbFilename); var dsspFilename = pdbFilename + ".dssp"; if (!File.Exists(dsspFilename)) { return(""); } var secondaryStructure = DsspFormatFile.LoadDsspFile(dsspFilename); if (chainId != null && secondaryStructure.FirstOrDefault(a => a.FieldChain.FieldValue == chainId) == null) { return(""); } if (startResidueSequenceIndex == -1) { startResidueSequenceIndex = secondaryStructure.Where(a => chainId == null || a.FieldChain.FieldValue == chainId).Min(a => int.Parse(a.FieldPdbResidueSequenceIndex.FieldValue)); } if (endResidueSequenceIndex == -1) { endResidueSequenceIndex = secondaryStructure.Where(a => chainId == null || a.FieldChain.FieldValue == chainId).Max(a => int.Parse(a.FieldPdbResidueSequenceIndex.FieldValue)); } // dssp specification says order may not be correct secondaryStructure = secondaryStructure.Where(a => !string.IsNullOrWhiteSpace(a.FieldChain.FieldValue) && !string.IsNullOrWhiteSpace(a.FieldPdbResidueSequenceIndex.FieldValue)).OrderBy(a => a.FieldChain.FieldValue).ThenBy(a => ProteinDataBankFileOperations.NullableTryParseInt32(a.FieldPdbResidueSequenceIndex.FieldValue)).ToList(); var proteinInterfaceLen = ProteinInterfaceDetection.CalculateProteinInterfaceLength(startResidueSequenceIndex, endResidueSequenceIndex); char[] result = new char[proteinInterfaceLen]; for (int index = 0; index < result.Length; index++) { result[index] = '_'; } foreach (var record in secondaryStructure.Where(a => chainId == null || a.FieldChain.FieldValue == chainId)) { var resSeq = ProteinDataBankFileOperations.NullableTryParseInt32(record.FieldPdbResidueSequenceIndex.FieldValue); if (resSeq == null || resSeq < startResidueSequenceIndex || resSeq > endResidueSequenceIndex) { continue; } var position = resSeq - startResidueSequenceIndex; if (record.FieldSecondaryStructure.FieldValue.Length == 0) { continue; } result[position.Value] = record.FieldSecondaryStructure.FieldValue[0]; } if (reversedSequence) { Array.Reverse(result); } return(new string(result)); }
/// <summary> /// This method finds interactions between detected proteinInterfaces. It is specific to dimers with exactly two chains. [Chain A /// ProteinInterface Index, Chain B ProteinInterface Index] /// </summary> /// <param name="cancellationToken"></param> /// <param name="pdbFilename"></param> /// <param name="pdbFileChains"></param> /// <param name="chainInteractingAtomLists"></param> /// <param name="fullClusteringResult"></param> /// <param name="proteinInterfacesClusteringResult"></param> /// <param name="detectedFinalStageIndexes"></param> /// <param name="pdbIdChainIdList"></param> /// <returns></returns> public static InteractionBetweenProteinInterfacesListContainer FindInteractionsBetweenAnyProteinInterfaces( CancellationToken cancellationToken, decimal maxAtomInterationDistance, string pdbFilename, Dictionary<string, List<string>> pdbIdChainIdList, ProteinChainListContainer pdbFileChains, ProteinChainListContainer chainInteractingAtomLists, ClusteringFullResultListContainer fullClusteringResult, ClusteringFullResultListContainer proteinInterfacesClusteringResult, int[] detectedFinalStageIndexes) { if (string.IsNullOrWhiteSpace(pdbFilename)) { throw new ArgumentOutOfRangeException(nameof(pdbFilename)); } if (!File.Exists(pdbFilename)) { throw new FileNotFoundException("File not found", pdbFilename); } if (ParameterValidation.IsProteinChainListContainerNullOrEmpty(chainInteractingAtomLists)) { throw new ArgumentOutOfRangeException(nameof(chainInteractingAtomLists)); } if (ParameterValidation.IsClusteringFullResultListContainerNullOrEmpty(fullClusteringResult)) { throw new ArgumentOutOfRangeException(nameof(fullClusteringResult)); } if (ParameterValidation.IsClusteringFullResultListContainerNullOrEmpty(proteinInterfacesClusteringResult)) { throw new ArgumentOutOfRangeException(nameof(proteinInterfacesClusteringResult)); } if (ParameterValidation.IsIntArrayNullOrEmpty(detectedFinalStageIndexes)) { throw new ArgumentOutOfRangeException(nameof(detectedFinalStageIndexes)); } string proteinId = ProteinDataBankFileOperations.PdbIdFromPdbFilename(pdbFilename); var interactionBetweenProteinInterfacesListContainer = new InteractionBetweenProteinInterfacesListContainer(); List<AtomPair> interactionList; if (pdbFileChains != null && pdbFileChains.ChainList != null && pdbFileChains.ChainList.Count > 0) { interactionList = SearchInteractions.FindInteractions(cancellationToken, maxAtomInterationDistance, proteinId, pdbIdChainIdList, pdbFileChains); //, false, -1, pdbFileChains); } else { interactionList = SearchInteractions.FindInteractions(cancellationToken, maxAtomInterationDistance, pdbFilename, pdbIdChainIdList); } var interactionInsideProteinInterfaceArray = new bool[interactionList.Count]; ////////Console.WriteLine(""); ////////Console.WriteLine(""); ////////Console.WriteLine("------------------ START ------------------"); //int c = 0; for (int chainIndexA = 0; chainIndexA < proteinInterfacesClusteringResult.ChainList.Count; chainIndexA++) { for (int chainIndexB = 0; chainIndexB < proteinInterfacesClusteringResult.ChainList.Count; chainIndexB++) { if (chainIndexA == chainIndexB || chainIndexB < chainIndexA) { continue; } List<ClusteringFullResultListContainer.Chain.Stage.Cluster> proteinInterfaceListA = proteinInterfacesClusteringResult.ChainList[chainIndexA].StageList[detectedFinalStageIndexes[chainIndexA]].ClusterList; List<ClusteringFullResultListContainer.Chain.Stage.Cluster> proteinInterfaceListB = proteinInterfacesClusteringResult.ChainList[chainIndexB].StageList[detectedFinalStageIndexes[chainIndexB]].ClusterList; int realProteinInterfaceIndexA = -1; for (int proteinInterfaceIndexA = 0; proteinInterfaceIndexA < proteinInterfaceListA.Count; proteinInterfaceIndexA++) { int realProteinInterfaceIndexB = -1; List<int> proteinInterfaceMemberIndexListA = proteinInterfaceListA[proteinInterfaceIndexA].AtomIndexList; List<ATOM_Record> proteinInterfaceAtomListA = proteinInterfaceMemberIndexListA.Select(proteinInterfaceMemberIndexA => chainInteractingAtomLists.ChainList[chainIndexA].AtomList[proteinInterfaceMemberIndexA]).ToList(); proteinInterfaceAtomListA = proteinInterfaceAtomListA.OrderBy(a => ProteinDataBankFileOperations.NullableTryParseInt32(a.resSeq.FieldValue)).ToList(); if (proteinInterfaceAtomListA.Count > 0) { realProteinInterfaceIndexA++; } else { continue; } for (int proteinInterfaceIndexB = 0; proteinInterfaceIndexB < proteinInterfaceListB.Count; proteinInterfaceIndexB++) { List<int> proteinInterfaceMemberIndexListB = proteinInterfaceListB[proteinInterfaceIndexB].AtomIndexList; List<ATOM_Record> proteinInterfaceAtomListB = proteinInterfaceMemberIndexListB.Select(proteinInterfaceMemberIndexB => chainInteractingAtomLists.ChainList[chainIndexB].AtomList[proteinInterfaceMemberIndexB]).ToList(); proteinInterfaceAtomListB = proteinInterfaceAtomListB.OrderBy(b => ProteinDataBankFileOperations.NullableTryParseInt32(b.resSeq.FieldValue)).ToList(); if (proteinInterfaceAtomListB.Count > 0) { realProteinInterfaceIndexB++; } else { continue; } for (int proteinInterfaceAtomListIndexA = 0; proteinInterfaceAtomListIndexA < proteinInterfaceAtomListA.Count; proteinInterfaceAtomListIndexA++) { ATOM_Record atomA = proteinInterfaceAtomListA[proteinInterfaceAtomListIndexA]; for (int proteinInterfaceAtomListIndexB = 0; proteinInterfaceAtomListIndexB < proteinInterfaceAtomListB.Count; proteinInterfaceAtomListIndexB++) { ATOM_Record atomB = proteinInterfaceAtomListB[proteinInterfaceAtomListIndexB]; //c++; ////////Console.WriteLine(c.ToString().PadLeft(5) + // " Chain " + chainIndexA + " (" + proteinInterfaceListA.Count(a => a.AtomIndexList.Count > 0) + " proteinInterfaces) ProteinInterface " + realProteinInterfaceIndexA + " (" + proteinInterfaceAtomListA.Count + " atoms) <--->" + // " Chain " + chainIndexB + " (" + proteinInterfaceListB.Count(a => a.AtomIndexList.Count > 0) + " proteinInterfaces) ProteinInterface " + realProteinInterfaceIndexB + " (" + proteinInterfaceAtomListB.Count + " atoms) --->" + // " chainID " + atomA.chainID.FieldValue + " resName " + atomA.resName.FieldValue + " resSeq " + atomA.resSeq.FieldValue + " <--->" + // " chainID " + atomB.chainID.FieldValue + " resName " + atomB.resName.FieldValue + " resSeq " + atomB.resSeq.FieldValue); for (int interactionIndex = 0; interactionIndex < interactionList.Count; interactionIndex++) { AtomPair interaction = interactionList[interactionIndex]; if ((interaction.Atom1 == atomA && interaction.Atom2 == atomB) || (interaction.Atom1 == atomB && interaction.Atom2 == atomA)) { interactionInsideProteinInterfaceArray[interactionIndex] = true; var interactionBetweenProteinInterfaces = new InteractionBetweenProteinInterfaces(); interactionBetweenProteinInterfacesListContainer.InteractionBetweenProteinInterfacesList.Add(interactionBetweenProteinInterfaces); interactionBetweenProteinInterfaces.Atom1.Atom = atomA; interactionBetweenProteinInterfaces.Atom1.FullProteinInterfaceId.ProteinId = proteinId; interactionBetweenProteinInterfaces.Atom1.FullProteinInterfaceId.ChainId = chainIndexA; interactionBetweenProteinInterfaces.Atom1.FullProteinInterfaceId.ProteinInterfaceId = realProteinInterfaceIndexA; interactionBetweenProteinInterfaces.Atom2.Atom = atomB; interactionBetweenProteinInterfaces.Atom2.FullProteinInterfaceId.ProteinId = proteinId; interactionBetweenProteinInterfaces.Atom2.FullProteinInterfaceId.ChainId = chainIndexB; interactionBetweenProteinInterfaces.Atom2.FullProteinInterfaceId.ProteinInterfaceId = realProteinInterfaceIndexB; } } } } } } } } for (int interactionIndex = 0; interactionIndex < interactionInsideProteinInterfaceArray.Length; interactionIndex++) { bool interactionInsideProteinInterface = interactionInsideProteinInterfaceArray[interactionIndex]; if (!interactionInsideProteinInterface) { var interactionBetweenNonProteinInterfaces = new InteractionBetweenProteinInterfaces(); interactionBetweenProteinInterfacesListContainer.InteractionBetweenNonProteinInterfacesList.Add(interactionBetweenNonProteinInterfaces); interactionBetweenNonProteinInterfaces.Atom1.Atom = interactionList[interactionIndex].Atom1; interactionBetweenNonProteinInterfaces.Atom1.FullProteinInterfaceId.ProteinId = proteinId; interactionBetweenNonProteinInterfaces.Atom1.FullProteinInterfaceId.ChainId = interactionList[interactionIndex].Atom1FullProteinInterfaceId.ChainId; interactionBetweenNonProteinInterfaces.Atom1.FullProteinInterfaceId.ProteinInterfaceId = -1; interactionBetweenNonProteinInterfaces.Atom2.Atom = interactionList[interactionIndex].Atom2; interactionBetweenNonProteinInterfaces.Atom2.FullProteinInterfaceId.ProteinId = proteinId; interactionBetweenNonProteinInterfaces.Atom2.FullProteinInterfaceId.ChainId = interactionList[interactionIndex].Atom2FullProteinInterfaceId.ChainId; interactionBetweenNonProteinInterfaces.Atom2.FullProteinInterfaceId.ProteinInterfaceId = -1; } } ////////Console.WriteLine("------------------ END ------------------"); // ensure sorted order interactionBetweenProteinInterfacesListContainer.InteractionBetweenProteinInterfacesList = interactionBetweenProteinInterfacesListContainer.InteractionBetweenProteinInterfacesList .OrderBy(a => a.Atom1.FullProteinInterfaceId.ChainId) .ThenBy(a => a.Atom1.FullProteinInterfaceId.ProteinInterfaceId) .ThenBy(a => ProteinDataBankFileOperations.NullableTryParseInt32(a.Atom1.Atom.resSeq.FieldValue)) .ThenBy(a => a.Atom2.FullProteinInterfaceId.ChainId) .ThenBy(a => a.Atom2.FullProteinInterfaceId.ProteinInterfaceId) .ThenBy(a => ProteinDataBankFileOperations.NullableTryParseInt32(a.Atom2.Atom.resSeq.FieldValue)) .ToList(); interactionBetweenProteinInterfacesListContainer.InteractionBetweenNonProteinInterfacesList = interactionBetweenProteinInterfacesListContainer.InteractionBetweenNonProteinInterfacesList .OrderBy(a => a.Atom1.FullProteinInterfaceId.ChainId) .ThenBy(a => a.Atom1.FullProteinInterfaceId.ProteinInterfaceId) .ThenBy(a => ProteinDataBankFileOperations.NullableTryParseInt32(a.Atom1.Atom.resSeq.FieldValue)) .ThenBy(a => a.Atom2.FullProteinInterfaceId.ChainId) .ThenBy(a => a.Atom2.FullProteinInterfaceId.ProteinInterfaceId) .ThenBy(a => ProteinDataBankFileOperations.NullableTryParseInt32(a.Atom2.Atom.resSeq.FieldValue)) .ToList(); // remove duplicates (as the list is sorted, duplicates will always be together in the list) for (int index = interactionBetweenProteinInterfacesListContainer.InteractionBetweenProteinInterfacesList.Count - 1; index > 0; index--) { InteractionBetweenProteinInterfaces lastInteractionBetweenProteinInterfaces = interactionBetweenProteinInterfacesListContainer.InteractionBetweenProteinInterfacesList[index - 1]; InteractionBetweenProteinInterfaces thisInteractionBetweenProteinInterfaces = interactionBetweenProteinInterfacesListContainer.InteractionBetweenProteinInterfacesList[index]; if (lastInteractionBetweenProteinInterfaces == null || thisInteractionBetweenProteinInterfaces == null) { continue; } if (thisInteractionBetweenProteinInterfaces.Atom1.FullProteinInterfaceId.ProteinId == lastInteractionBetweenProteinInterfaces.Atom1.FullProteinInterfaceId.ProteinId && thisInteractionBetweenProteinInterfaces.Atom1.FullProteinInterfaceId.ChainId == lastInteractionBetweenProteinInterfaces.Atom1.FullProteinInterfaceId.ChainId && thisInteractionBetweenProteinInterfaces.Atom1.FullProteinInterfaceId.ProteinInterfaceId == lastInteractionBetweenProteinInterfaces.Atom1.FullProteinInterfaceId.ProteinInterfaceId && thisInteractionBetweenProteinInterfaces.Atom1.Atom == lastInteractionBetweenProteinInterfaces.Atom1.Atom && thisInteractionBetweenProteinInterfaces.Atom2.FullProteinInterfaceId.ProteinId == lastInteractionBetweenProteinInterfaces.Atom2.FullProteinInterfaceId.ProteinId && thisInteractionBetweenProteinInterfaces.Atom2.FullProteinInterfaceId.ChainId == lastInteractionBetweenProteinInterfaces.Atom2.FullProteinInterfaceId.ChainId && thisInteractionBetweenProteinInterfaces.Atom2.FullProteinInterfaceId.ProteinInterfaceId == lastInteractionBetweenProteinInterfaces.Atom2.FullProteinInterfaceId.ProteinInterfaceId && thisInteractionBetweenProteinInterfaces.Atom2.Atom == lastInteractionBetweenProteinInterfaces.Atom2.Atom) { interactionBetweenProteinInterfacesListContainer.InteractionBetweenProteinInterfacesList.RemoveAt(index - 1); //////Console.WriteLine("removed duplicate"); } } for (int index = interactionBetweenProteinInterfacesListContainer.InteractionBetweenNonProteinInterfacesList.Count - 1; index > 0; index--) { InteractionBetweenProteinInterfaces lastInteractionBetweenProteinInterfaces = interactionBetweenProteinInterfacesListContainer.InteractionBetweenNonProteinInterfacesList[index - 1]; InteractionBetweenProteinInterfaces thisInteractionBetweenProteinInterfaces = interactionBetweenProteinInterfacesListContainer.InteractionBetweenNonProteinInterfacesList[index]; if (lastInteractionBetweenProteinInterfaces == null || thisInteractionBetweenProteinInterfaces == null) { continue; } if (thisInteractionBetweenProteinInterfaces.Atom1.FullProteinInterfaceId.ProteinId == lastInteractionBetweenProteinInterfaces.Atom1.FullProteinInterfaceId.ProteinId && thisInteractionBetweenProteinInterfaces.Atom1.FullProteinInterfaceId.ChainId == lastInteractionBetweenProteinInterfaces.Atom1.FullProteinInterfaceId.ChainId && thisInteractionBetweenProteinInterfaces.Atom1.FullProteinInterfaceId.ProteinInterfaceId == lastInteractionBetweenProteinInterfaces.Atom1.FullProteinInterfaceId.ProteinInterfaceId && thisInteractionBetweenProteinInterfaces.Atom1.Atom == lastInteractionBetweenProteinInterfaces.Atom1.Atom && thisInteractionBetweenProteinInterfaces.Atom2.FullProteinInterfaceId.ProteinId == lastInteractionBetweenProteinInterfaces.Atom2.FullProteinInterfaceId.ProteinId && thisInteractionBetweenProteinInterfaces.Atom2.FullProteinInterfaceId.ChainId == lastInteractionBetweenProteinInterfaces.Atom2.FullProteinInterfaceId.ChainId && thisInteractionBetweenProteinInterfaces.Atom2.FullProteinInterfaceId.ProteinInterfaceId == lastInteractionBetweenProteinInterfaces.Atom2.FullProteinInterfaceId.ProteinInterfaceId && thisInteractionBetweenProteinInterfaces.Atom2.Atom == lastInteractionBetweenProteinInterfaces.Atom2.Atom) { interactionBetweenProteinInterfacesListContainer.InteractionBetweenNonProteinInterfacesList.RemoveAt(index - 1); //////Console.WriteLine("removed duplicate"); } } return interactionBetweenProteinInterfacesListContainer; }
/// <summary> /// Load proteinInterface data from the PDB file based on a list of already detected proteinInterfaces. /// The detected proteinInterfaces may be missing data such as other atoms or residues which are also in the proteinInterface but were not /// directly interacting. /// The positions and lengths of the proteinInterfaces are also calculated. /// </summary> /// <param name="pdbFilename"></param> /// <param name="pdbFileChains"></param> /// <param name="singularAaToAaInteractions"></param> /// <param name="proteinInterfacesClusteringResult"></param> /// <param name="detectedBestStages"></param> /// <param name="interactionBetweenProteinInterfacesContainer"></param> /// <returns></returns> public static List <ProteinInterfaceSequenceAndPositionData> AnalyseProteinInterfacesSequenceAndPositionData( string pdbFilename, Dictionary <string, List <string> > pdbIdChainIdList, ProteinChainListContainer pdbFileChains, ProteinChainListContainer singularAaToAaInteractions, ClusteringFullResultListContainer proteinInterfacesClusteringResult, int[] detectedBestStages, InteractionBetweenProteinInterfacesListContainer interactionBetweenProteinInterfacesContainer) { if (string.IsNullOrWhiteSpace(pdbFilename)) { throw new ArgumentOutOfRangeException(nameof(pdbFilename)); } if (!File.Exists(pdbFilename)) { throw new FileNotFoundException("File not found", pdbFilename); } if (ParameterValidation.IsProteinChainListContainerNullOrEmpty(singularAaToAaInteractions)) { throw new ArgumentOutOfRangeException(nameof(singularAaToAaInteractions)); } if (ParameterValidation.IsClusteringFullResultListContainerNullOrEmpty(proteinInterfacesClusteringResult)) { throw new ArgumentOutOfRangeException(nameof(proteinInterfacesClusteringResult)); } if (ParameterValidation.IsIntArrayNullOrEmpty(detectedBestStages)) { throw new ArgumentOutOfRangeException(nameof(detectedBestStages)); } // ProteinInterfaces are clusters with non-proteinInterfaces removed. var result = new List <ProteinInterfaceSequenceAndPositionData>(); string proteinId = ProteinDataBankFileOperations.PdbIdFromPdbFilename(pdbFilename); int totalChains = proteinInterfacesClusteringResult.ChainList.Count; for (int chainIndex = 0; chainIndex < totalChains; chainIndex++) { int stageIndex = detectedBestStages[chainIndex]; string chainIdLetter = SpreadsheetFileHandler.AlphabetLetterRollOver(chainIndex); List <ClusteringFullResultListContainer.Chain.Stage.Cluster> proteinInterfaceList = proteinInterfacesClusteringResult.ChainList[chainIndex].StageList[stageIndex].ClusterList; List <ClusteringFullResultListContainer.Chain.Stage.Cluster> nonEmptyProteinInterfaceList = proteinInterfaceList.Where(a => a != null && a.AtomIndexList != null && a.AtomIndexList.Count > 0).ToList(); // loop through each proteinInterface for (int proteinInterfaceIndex = 0; proteinInterfaceIndex < nonEmptyProteinInterfaceList.Count; proteinInterfaceIndex++) { ClusteringFullResultListContainer.Chain.Stage.Cluster proteinInterface = nonEmptyProteinInterfaceList[proteinInterfaceIndex]; // Find min and max residue sequence index value in the proteinInterface MinMax proteinInterfaceResidueSequenceIndexes = MinMaxResidueSequenceIndex(proteinInterface, singularAaToAaInteractions, chainIndex); int proteinInterfaceLength = CalculateProteinInterfaceLength(proteinInterfaceResidueSequenceIndexes.Min, proteinInterfaceResidueSequenceIndexes.Max); string proteinInterfaceIdLetter = SpreadsheetFileHandler.AlphabetLetterRollOver(proteinInterfaceIndex); var proteinInterfacePositionData = new ProteinInterfaceSequenceAndPositionData { FullProteinInterfaceId = new FullProteinInterfaceId(proteinId, chainIndex, proteinInterfaceIndex, proteinInterfaceResidueSequenceIndexes.Min, proteinInterfaceResidueSequenceIndexes.Max), ChainIdLetter = chainIdLetter, ProteinInterfaceIdLetter = proteinInterfaceIdLetter, StartPosition = proteinInterfaceResidueSequenceIndexes.Min, EndPosition = proteinInterfaceResidueSequenceIndexes.Max, ProteinInterfaceLength = CalculateProteinInterfaceLength(proteinInterfaceResidueSequenceIndexes.Min, proteinInterfaceResidueSequenceIndexes.Max) }; proteinInterfacePositionData.AminoAcidSequenceAllResidueSequenceIndexes = new ProteinInterfaceAminoAcidMetaData[proteinInterfacePositionData.ProteinInterfaceLength]; proteinInterfacePositionData.AminoAcidSequenceAll1L = ""; proteinInterfacePositionData.AminoAcidSequenceInteractionsAll1L = ""; proteinInterfacePositionData.AminoAcidSequenceInteractionsInsideProteinInterfacesOnly1L = ""; proteinInterfacePositionData.AminoAcidSequenceInteractionsNone1L = ""; proteinInterfacePositionData.AminoAcidSequenceInteractionsOutsideProteinInterfacesOnly1L = ""; proteinInterfacePositionData.AminoAcidSequenceAll3L = ""; proteinInterfacePositionData.AminoAcidSequenceInteractionsAll3L = ""; proteinInterfacePositionData.AminoAcidSequenceInteractionsInsideProteinInterfacesOnly3L = ""; proteinInterfacePositionData.AminoAcidSequenceInteractionsNone3L = ""; proteinInterfacePositionData.AminoAcidSequenceInteractionsOutsideProteinInterfacesOnly3L = ""; //int foundAtomCount = 0; const string placeholder1L = "_"; const string placeholder3L = "___"; for (int residueSequenceIndex = proteinInterfaceResidueSequenceIndexes.Min; residueSequenceIndex <= proteinInterfaceResidueSequenceIndexes.Max; residueSequenceIndex++) { /* questions * 1. does this reside interact with another reside which is also part of a proteinInterface? * 2. if not, does this reside interact at all? */ var proteinInterfaceAminoAcidMetaData = new ProteinInterfaceAminoAcidMetaData(); proteinInterfacePositionData.AminoAcidSequenceAllResidueSequenceIndexes[proteinInterfacePositionData.AminoAcidSequenceAll1L.Length] = proteinInterfaceAminoAcidMetaData; ATOM_Record foundAtomInsidePdbFile = AtomSearchMethods.FindAtomInsidePdbFileChain(pdbFileChains, chainIndex, residueSequenceIndex); if (foundAtomInsidePdbFile == null) { // Non-CA atom is loaded here in case of missing CA atom to find the AA code for the resSeq index var chainIdList = pdbIdChainIdList != null ? (pdbIdChainIdList.ContainsKey(proteinId) ? pdbIdChainIdList[proteinId].ToArray() : null) : null; ProteinChainListContainer pdbFileChains2 = ProteinDataBankFileOperations.PdbAtomicChains(pdbFilename, chainIdList, -1, -1, false); foundAtomInsidePdbFile = AtomSearchMethods.FindAtomInsidePdbFileChain(pdbFileChains2, chainIndex, residueSequenceIndex); } proteinInterfaceAminoAcidMetaData.PdbResidueSequenceIndex = residueSequenceIndex; proteinInterfaceAminoAcidMetaData.ArrayMemberIndex = pdbFileChains.ChainList[chainIndex].AtomList.IndexOf(foundAtomInsidePdbFile); proteinInterfaceAminoAcidMetaData.OppoproteinInterfaceInteractions = new bool[proteinInterfaceLength]; if (foundAtomInsidePdbFile != null) { proteinInterfacePositionData.AminoAcidSequenceAll1L += AminoAcidConversions.AminoAcidNameToCode1L(foundAtomInsidePdbFile.resName.FieldValue); proteinInterfacePositionData.AminoAcidSequenceAll3L += foundAtomInsidePdbFile.resName.FieldValue.PadRight(3, '_'); } else { proteinInterfacePositionData.AminoAcidSequenceAll1L += placeholder1L; proteinInterfacePositionData.AminoAcidSequenceInteractionsAll1L += placeholder1L; proteinInterfacePositionData.AminoAcidSequenceInteractionsInsideProteinInterfacesOnly1L += placeholder1L; proteinInterfacePositionData.AminoAcidSequenceInteractionsOutsideProteinInterfacesOnly1L += placeholder1L; proteinInterfacePositionData.AminoAcidSequenceInteractionsNone1L += placeholder1L; proteinInterfacePositionData.AminoAcidSequenceAll3L += placeholder3L; proteinInterfacePositionData.AminoAcidSequenceInteractionsAll3L += placeholder3L; proteinInterfacePositionData.AminoAcidSequenceInteractionsInsideProteinInterfacesOnly3L += placeholder3L; proteinInterfacePositionData.AminoAcidSequenceInteractionsOutsideProteinInterfacesOnly3L += placeholder3L; proteinInterfacePositionData.AminoAcidSequenceInteractionsNone3L += placeholder3L; proteinInterfaceAminoAcidMetaData.ProteinInterfaceInteractionType = ProteinInterfaceInteractionType.NoInteractionFound; proteinInterfaceAminoAcidMetaData.NonProteinInterfaceInteractionResidueNames1L += placeholder1L; proteinInterfaceAminoAcidMetaData.NonProteinInterfaceInteractionResidueNames3L += placeholder3L; continue; } List <ATOM_Record> foundAtomInteractingWithAnotherProteinInterface = AtomSearchMethods.FindAtomInteractingWithOtherProteinInterfaces(foundAtomInsidePdbFile, interactionBetweenProteinInterfacesContainer, FindAtomInteractingWithAnotherProteinInterfaceOptions.FindAtomsInteractingWithOtherProteinInterfaces); List <ATOM_Record> foundAtomInteractingWithNonProteinInterface = AtomSearchMethods.FindAtomInteractingWithOtherProteinInterfaces(foundAtomInsidePdbFile, interactionBetweenProteinInterfacesContainer, FindAtomInteractingWithAnotherProteinInterfaceOptions.FindAtomsInteractingWithNonProteinInterfaces); proteinInterfaceAminoAcidMetaData.OppoproteinInterfaceInteractions = AminoAcidInteractionVector(singularAaToAaInteractions, proteinInterfacesClusteringResult, detectedBestStages, interactionBetweenProteinInterfacesContainer, chainIndex, proteinInterfaceIndex, residueSequenceIndex); proteinInterfaceAminoAcidMetaData.ResidueName1L = AminoAcidConversions.AminoAcidNameToCode1L(foundAtomInsidePdbFile.resName.FieldValue); proteinInterfaceAminoAcidMetaData.ResidueName3L = foundAtomInsidePdbFile.resName.FieldValue.PadRight(3, '_'); if (foundAtomInteractingWithAnotherProteinInterface != null) { foreach (ATOM_Record atom in foundAtomInteractingWithAnotherProteinInterface) { proteinInterfaceAminoAcidMetaData.ProteinInterfaceInteractionResidueNames1L += AminoAcidConversions.AminoAcidNameToCode1L(atom.resName.FieldValue); proteinInterfaceAminoAcidMetaData.ProteinInterfaceInteractionResidueNames3L += atom.resName.FieldValue.PadRight(3, '_'); } } if (foundAtomInteractingWithNonProteinInterface != null) { foreach (ATOM_Record atom in foundAtomInteractingWithNonProteinInterface) { proteinInterfaceAminoAcidMetaData.NonProteinInterfaceInteractionResidueNames1L += AminoAcidConversions.AminoAcidNameToCode1L(atom.resName.FieldValue); proteinInterfaceAminoAcidMetaData.NonProteinInterfaceInteractionResidueNames3L += atom.resName.FieldValue.PadRight(3, '_'); } } if (foundAtomInteractingWithAnotherProteinInterface != null && foundAtomInteractingWithAnotherProteinInterface.Count > 0) { proteinInterfacePositionData.AminoAcidSequenceInteractionsAll1L += AminoAcidConversions.AminoAcidNameToCode1L(foundAtomInsidePdbFile.resName.FieldValue); proteinInterfacePositionData.AminoAcidSequenceInteractionsInsideProteinInterfacesOnly1L += AminoAcidConversions.AminoAcidNameToCode1L(foundAtomInsidePdbFile.resName.FieldValue); proteinInterfacePositionData.AminoAcidSequenceInteractionsOutsideProteinInterfacesOnly1L += placeholder1L; proteinInterfacePositionData.AminoAcidSequenceInteractionsNone1L += placeholder1L; proteinInterfacePositionData.AminoAcidSequenceInteractionsAll3L += foundAtomInsidePdbFile.resName.FieldValue.PadRight(3, '_'); proteinInterfacePositionData.AminoAcidSequenceInteractionsInsideProteinInterfacesOnly3L += foundAtomInsidePdbFile.resName.FieldValue.PadRight(3, '_'); proteinInterfacePositionData.AminoAcidSequenceInteractionsOutsideProteinInterfacesOnly3L += placeholder3L; proteinInterfacePositionData.AminoAcidSequenceInteractionsNone3L += placeholder3L; proteinInterfaceAminoAcidMetaData.ProteinInterfaceInteractionType = ProteinInterfaceInteractionType.InteractionWithAnotherProteinInterface; if (foundAtomInteractingWithNonProteinInterface != null && foundAtomInteractingWithNonProteinInterface.Count > 0) { proteinInterfaceAminoAcidMetaData.ProteinInterfaceInteractionType |= ProteinInterfaceInteractionType.InteractionWithNonProteinInterface; } } else if (foundAtomInteractingWithNonProteinInterface != null && foundAtomInteractingWithNonProteinInterface.Count > 0) { proteinInterfacePositionData.AminoAcidSequenceInteractionsAll1L += AminoAcidConversions.AminoAcidNameToCode1L(foundAtomInsidePdbFile.resName.FieldValue); proteinInterfacePositionData.AminoAcidSequenceInteractionsInsideProteinInterfacesOnly1L += placeholder1L; proteinInterfacePositionData.AminoAcidSequenceInteractionsOutsideProteinInterfacesOnly1L += AminoAcidConversions.AminoAcidNameToCode1L(foundAtomInsidePdbFile.resName.FieldValue); proteinInterfacePositionData.AminoAcidSequenceInteractionsNone1L += placeholder1L; proteinInterfacePositionData.AminoAcidSequenceInteractionsAll3L += foundAtomInsidePdbFile.resName.FieldValue.PadRight(3, '_'); proteinInterfacePositionData.AminoAcidSequenceInteractionsInsideProteinInterfacesOnly3L += placeholder3L; proteinInterfacePositionData.AminoAcidSequenceInteractionsOutsideProteinInterfacesOnly3L += foundAtomInsidePdbFile.resName.FieldValue.PadRight(3, '_'); proteinInterfacePositionData.AminoAcidSequenceInteractionsNone3L += placeholder3L; proteinInterfaceAminoAcidMetaData.ProteinInterfaceInteractionType = ProteinInterfaceInteractionType.InteractionWithNonProteinInterface; } else { proteinInterfacePositionData.AminoAcidSequenceInteractionsAll1L += placeholder1L; proteinInterfacePositionData.AminoAcidSequenceInteractionsInsideProteinInterfacesOnly1L += placeholder1L; proteinInterfacePositionData.AminoAcidSequenceInteractionsOutsideProteinInterfacesOnly1L += placeholder1L; proteinInterfacePositionData.AminoAcidSequenceInteractionsNone1L += AminoAcidConversions.AminoAcidNameToCode1L(foundAtomInsidePdbFile.resName.FieldValue); proteinInterfacePositionData.AminoAcidSequenceInteractionsAll3L += placeholder3L; proteinInterfacePositionData.AminoAcidSequenceInteractionsInsideProteinInterfacesOnly3L += placeholder3L; proteinInterfacePositionData.AminoAcidSequenceInteractionsOutsideProteinInterfacesOnly3L += placeholder3L; proteinInterfacePositionData.AminoAcidSequenceInteractionsNone3L += foundAtomInsidePdbFile.resName.FieldValue.PadRight(3, '_'); proteinInterfaceAminoAcidMetaData.ProteinInterfaceInteractionType = ProteinInterfaceInteractionType.NoInteractionFound; } } result.Add(proteinInterfacePositionData); } } return(result); }
/// <summary> /// This method returns an array showing whether an amino acid has an interaction with any amino acids in a proteinInterface of /// another chain /// </summary> /// <returns></returns> public static bool[] AminoAcidInteractionVector( ProteinChainListContainer singularAaToAaInteractions, ClusteringFullResultListContainer proteinInterfacesClusteringResult, int[] detectedBestStages, InteractionBetweenProteinInterfacesListContainer interactionsBetweenProteinInterfacesContainer, //int proteinInterfaceLength, int sourceChainIndex, int sourceProteinInterfaceIndex, int sourceResidueIndex ) { // find the largest proteinInterface to make vector the same size int maxProteinInterfaceLength = 0; for (int chainIndex = 0; chainIndex < proteinInterfacesClusteringResult.ChainList.Count; chainIndex++) { List <ClusteringFullResultListContainer.Chain.Stage.Cluster> proteinInterfaceList = proteinInterfacesClusteringResult.ChainList[chainIndex].StageList[detectedBestStages[chainIndex]].ClusterList; List <ClusteringFullResultListContainer.Chain.Stage.Cluster> nonEmptyProteinInterfaceList = proteinInterfaceList.Where(a => a != null && a.AtomIndexList != null && a.AtomIndexList.Count > 0).ToList(); for (int proteinInterfaceIndex = 0; proteinInterfaceIndex < nonEmptyProteinInterfaceList.Count; proteinInterfaceIndex++) { ClusteringFullResultListContainer.Chain.Stage.Cluster proteinInterface = nonEmptyProteinInterfaceList[proteinInterfaceIndex]; if (proteinInterface.AtomIndexList == null || proteinInterface.AtomIndexList.Count == 0) { continue; } int length = FindProteinInterfaceLength(singularAaToAaInteractions, proteinInterfacesClusteringResult, detectedBestStages, chainIndex, proteinInterfaceIndex); if (length > maxProteinInterfaceLength) { maxProteinInterfaceLength = length; } } } // find interactions matching the current chain id and proteinInterface id and res id... res id is different from resSeq in the pdb var result = new bool[maxProteinInterfaceLength]; List <InteractionBetweenProteinInterfaces> matchingInteractions = interactionsBetweenProteinInterfacesContainer.InteractionBetweenProteinInterfacesList.Where(a => (a.Atom1.FullProteinInterfaceId.ChainId == sourceChainIndex && a.Atom1.FullProteinInterfaceId.ProteinInterfaceId == sourceProteinInterfaceIndex) || (a.Atom2.FullProteinInterfaceId.ChainId == sourceChainIndex && a.Atom2.FullProteinInterfaceId.ProteinInterfaceId == sourceProteinInterfaceIndex)).ToList(); if (matchingInteractions.Count == 0) { return(result); } foreach (InteractionBetweenProteinInterfaces interaction in matchingInteractions) { if (interaction.Atom1.FullProteinInterfaceId.ChainId == sourceChainIndex && interaction.Atom1.FullProteinInterfaceId.ProteinInterfaceId == sourceProteinInterfaceIndex && ProteinDataBankFileOperations.NullableTryParseInt32(interaction.Atom1.Atom.resSeq.FieldValue) == sourceResidueIndex) { // where in the proteinInterface oppoproteinInterface proteinInterface is Atom2? int index = AtomIndexPositionInProteinInterface(singularAaToAaInteractions, proteinInterfacesClusteringResult, detectedBestStages, interaction.Atom2.FullProteinInterfaceId.ChainId, interaction.Atom2.FullProteinInterfaceId.ProteinInterfaceId, interaction.Atom2.Atom); result[index] = true; } else if (interaction.Atom2.FullProteinInterfaceId.ChainId == sourceChainIndex && interaction.Atom2.FullProteinInterfaceId.ProteinInterfaceId == sourceProteinInterfaceIndex && ProteinDataBankFileOperations.NullableTryParseInt32(interaction.Atom2.Atom.resSeq.FieldValue) == sourceResidueIndex) { int index = AtomIndexPositionInProteinInterface(singularAaToAaInteractions, proteinInterfacesClusteringResult, detectedBestStages, interaction.Atom1.FullProteinInterfaceId.ChainId, interaction.Atom1.FullProteinInterfaceId.ProteinInterfaceId, interaction.Atom1.Atom); result[index] = true; } } return(result); }
/// <summary> /// /// </summary> /// <param name="cancellationToken"></param> /// <param name="pdbFilename"></param> /// <param name="pdbIdChainIdList"></param> /// <param name="pdbFileChains"></param> /// <param name="singularAaToAaInteractions"></param> /// <param name="fullClusteringResult"></param> /// <returns></returns> public static ProteinInterfaceAnalysisResultData AnalyseProteinInterfaces( CancellationToken cancellationToken, decimal maxAtomInterationDistance, decimal minimumProteinInterfaceDensity, string pdbFilename, Dictionary <string, List <string> > pdbIdChainIdList, ProteinChainListContainer pdbFileChains, ProteinChainListContainer singularAaToAaInteractions, ClusteringFullResultListContainer fullClusteringResult) { if (ParameterValidation.IsLoadFilenameInvalid(pdbFilename)) { throw new ArgumentOutOfRangeException(nameof(pdbFilename)); } if (ParameterValidation.IsProteinChainListContainerNullOrEmpty(singularAaToAaInteractions)) { throw new ArgumentOutOfRangeException(nameof(singularAaToAaInteractions)); } if (ParameterValidation.IsClusteringFullResultListContainerNullOrEmpty(fullClusteringResult)) { throw new ArgumentOutOfRangeException(nameof(fullClusteringResult)); } string proteinId = ProteinDataBankFileOperations.PdbIdFromPdbFilename(pdbFilename); List <List <int> > chainStageProteinInterfaceCount; // Find how many proteinInterfaces at each stage. ClusteringFullResultListContainer proteinInterfacesClusteringResult = DetectProteinInterfaces(proteinId, singularAaToAaInteractions, fullClusteringResult, out chainStageProteinInterfaceCount, ClusteringProteinInterfaceDensityDetectionOptions.ResidueSequenceIndex, minimumProteinInterfaceDensity); // Find the last stage having required number of proteinInterfaces. int[] detectedBestClusterStagesIndexes = ProteinInterfaceTreeOptimalStageDetection.FindFinalProteinInterfaceStageIndexes(singularAaToAaInteractions, fullClusteringResult, proteinInterfacesClusteringResult, chainStageProteinInterfaceCount); int totalChains = singularAaToAaInteractions.ChainList.Count; var interactionProteinInterfaceClusteringHierarchyDataList = new List <InteractionProteinInterfaceClusteringHierarchyData>(); int[] numberProteinInterfacesPerChain = FindNumberProteinInterfacesPerChain(proteinInterfacesClusteringResult, detectedBestClusterStagesIndexes); for (int chainIndex = 0; chainIndex < totalChains; chainIndex++) { int stageIndex = detectedBestClusterStagesIndexes[chainIndex]; string chainIdLetter = SpreadsheetFileHandler.AlphabetLetterRollOver(chainIndex); var interactionProteinInterfaceClusteringHierarchyData = new InteractionProteinInterfaceClusteringHierarchyData(proteinId, chainIdLetter, numberProteinInterfacesPerChain[chainIndex], stageIndex + 1, fullClusteringResult.ChainList[chainIndex].StageList.Count); interactionProteinInterfaceClusteringHierarchyDataList.Add(interactionProteinInterfaceClusteringHierarchyData); } InteractionBetweenProteinInterfacesListContainer interactionBetweenProteinInterfacesContainer = CrossProteinInterfaceInteractions.FindInteractionsBetweenAnyProteinInterfaces(cancellationToken, maxAtomInterationDistance, pdbFilename, pdbIdChainIdList, pdbFileChains, singularAaToAaInteractions, fullClusteringResult, proteinInterfacesClusteringResult, detectedBestClusterStagesIndexes); List <ProteinInterfaceSequenceAndPositionData> analyseProteinInterfacesSequenceAndPositionData = AnalyseProteinInterfacesSequenceAndPositionData(pdbFilename, pdbIdChainIdList, pdbFileChains, singularAaToAaInteractions, proteinInterfacesClusteringResult, detectedBestClusterStagesIndexes, interactionBetweenProteinInterfacesContainer); var result = new ProteinInterfaceAnalysisResultData( detectedBestClusterStagesIndexes, proteinInterfacesClusteringResult, interactionProteinInterfaceClusteringHierarchyDataList, interactionBetweenProteinInterfacesContainer, analyseProteinInterfacesSequenceAndPositionData ); return(result); }
public static List <AtomPair> FindInteractions(CancellationToken cancellationToken, decimal maxAtomInterationDistance /*= 8.0m*/, string proteinId, Dictionary <string, List <string> > pdbIdChainIdList, ProteinChainListContainer proteinFileChains, bool breakWhenFirstInteractionFound = false, int totalThreads = -1, bool sort = true, int requiredChains = -1) { //const decimal maxInterationDistance = 8.0m; bool useCache = false; if (useCache && !string.IsNullOrWhiteSpace(proteinId)) { var cachedInteractions = InteractionsCache.LoadPdbInteractionCache(proteinId, requiredChains); if (cachedInteractions != null) { return(cachedInteractions); } } // check required number of chains are found if (proteinFileChains == null || proteinFileChains.ChainList == null || (requiredChains > -1 && proteinFileChains.ChainList.Count != requiredChains)) { return(null); } // check that all chains have atoms if (proteinFileChains.ChainList.Any(chain => chain.AtomList == null || chain.AtomList.Count == 0)) { return(null); } // Make list of 3D positions of atoms. var positions = new List <Point3D> [proteinFileChains.ChainList.Count]; for (int chainIndex = 0; chainIndex < proteinFileChains.ChainList.Count; chainIndex++) { positions[chainIndex] = Clustering.AtomRecordListToPoint3DList(proteinFileChains.ChainList[chainIndex]); } var tasks = new List <Task <List <AtomPair> > >(); for (int chainIndexA = 0; chainIndexA < proteinFileChains.ChainList.Count; chainIndexA++) { for (int chainIndexB = 0; chainIndexB < proteinFileChains.ChainList.Count; chainIndexB++) { if (chainIndexB == chainIndexA || chainIndexB < chainIndexA) { continue; } WorkDivision <List <AtomPair> > workDivision = new WorkDivision <List <AtomPair> >(proteinFileChains.ChainList[chainIndexA].AtomList.Count, totalThreads); bool breakOut = false; var lockBreakOut = new object(); for (int threadIndex = 0; threadIndex < workDivision.ThreadCount; threadIndex++) { int localThreadIndex = threadIndex; int localChainIndexA = chainIndexA; int localChainIndexB = chainIndexB; WorkDivision <List <AtomPair> > localWorkDivision = workDivision; Task <List <AtomPair> > task = Task.Run(() => { var taskResult = new List <AtomPair>(); for (int atomIndexA = localWorkDivision.ThreadFirstIndex[localThreadIndex]; atomIndexA <= localWorkDivision.ThreadLastIndex[localThreadIndex]; atomIndexA++) { if (breakOut) { break; } for (int atomIndexB = 0; atomIndexB < proteinFileChains.ChainList[localChainIndexB].AtomList.Count; atomIndexB++) { if (breakOut || (breakWhenFirstInteractionFound && taskResult.Count > 0)) { lock (lockBreakOut) { breakOut = true; } break; } if ((!positions[localChainIndexA][atomIndexA].ParseOK) || (!positions[localChainIndexB][atomIndexB].ParseOK)) { continue; } decimal atomicDistanceAngstroms3D = Point3D.Distance3D(positions[localChainIndexA][atomIndexA], positions[localChainIndexB][atomIndexB], true); // Chemical proteinInterface bonds found at 5 angstrom or less. if (atomicDistanceAngstroms3D <= 0.0m || atomicDistanceAngstroms3D > maxAtomInterationDistance) { continue; } var atomPair = new AtomPair( proteinId, proteinFileChains.ChainList[localChainIndexA].AtomList[atomIndexA], localChainIndexA, proteinId, localChainIndexB, proteinFileChains.ChainList[localChainIndexB].AtomList[atomIndexB], atomicDistanceAngstroms3D); taskResult.Add(atomPair); } } if (taskResult.Count == 0) { return(null); } return(taskResult); }, cancellationToken); workDivision.TaskList.Add(task); } tasks.AddRange(workDivision.TaskList); } } try { Task[] tasksToWait = tasks.Where(task => task != null && !task.IsCompleted).ToArray <Task>(); if (tasksToWait.Length > 0) { Task.WaitAll(tasksToWait); } } catch (AggregateException) { } // merge all results var atomPairList = new List <AtomPair>(); foreach (var task in tasks.Where(t => t != null && t.IsCompleted && !t.IsCanceled && !t.IsFaulted && t.Result != null && t.Result.Count > 0)) { atomPairList.AddRange(task.Result); } if (sort && atomPairList != null && atomPairList.Count > 1) { atomPairList = atomPairList .OrderBy(i => ProteinDataBankFileOperations.NullableTryParseInt32(i.Atom1.resSeq.FieldValue)) .ThenBy(i => ProteinDataBankFileOperations.NullableTryParseInt32(i.Atom1.serial.FieldValue)) .ThenBy(j => ProteinDataBankFileOperations.NullableTryParseInt32(j.Atom2.resSeq.FieldValue)) .ThenBy(j => ProteinDataBankFileOperations.NullableTryParseInt32(j.Atom2.serial.FieldValue)) .ToList(); } if (useCache) { InteractionsCache.SavePdbInteractionCache(proteinId, atomPairList, requiredChains); } return(atomPairList); }
public static string[,] Spreadsheet(List <MotifProfileSpreadsheetRecord> motifProfileSpreadsheetRecordList) { if (motifProfileSpreadsheetRecordList == null) { throw new ArgumentNullException(nameof(motifProfileSpreadsheetRecordList)); } var result = new List <string[]>(); var totalAminoAcids = AminoAcidTotals.TotalAminoAcids(); var sheetHeader = new List <string>() { "Motif Name", "Motif Source", "Direction", "Total Found", //"Total Found In Heterodimers", //"Total Found In Homodimers", "Profile Position", }; sheetHeader.AddRange(AminoAcidConversions.AminoAcidCodeArray1L()); result.Add(sheetHeader.ToArray()); foreach (var record in motifProfileSpreadsheetRecordList.OrderByDescending(a => ProteinDataBankFileOperations.NullableTryParseInt32(a.TotalFound))) { result.Add(new string[] { }); var recordHeader = new List <string>() { record.MotifName, record.MotifSource, record.Direction, record.TotalFound, //record.TotalFoundInHeterodimers, //record.TotalFoundInHomodimers, "", }; recordHeader.AddRange(AminoAcidConversions.AminoAcidCodeArray1L()); result.Add(recordHeader.ToArray()); for (var positionIndex = 0; positionIndex < record.AminoAcidProfile.Length; positionIndex++) { var row = new string[sheetHeader.Count]; row[sheetHeader.IndexOf("Profile Position")] = "" + (positionIndex + 1); for (var aaIndex = 0; aaIndex < record.AminoAcidProfile[positionIndex].Length; aaIndex++) { row[aaIndex + sheetHeader.IndexOf("Profile Position") + 1] = $"{record.AminoAcidProfile[positionIndex][aaIndex]:0.00}"; } result.Add(row); } var rowAverage = new string[sheetHeader.Count]; rowAverage[sheetHeader.IndexOf("Profile Position")] = "Average"; for (var aaIndex = 0; aaIndex < record.AverageProfile.Length; aaIndex++) { rowAverage[aaIndex + sheetHeader.IndexOf("Profile Position") + 1] = $"{record.AverageProfile[aaIndex]:0.00}"; } result.Add(rowAverage); } return(ConvertTypes.StringJagged2DArrayTo2DArray(result.ToArray())); }
public static List <string> RemoveNonProteinInterfaceRecords(string pdbFilename, List <ProteinInterfaceId> proteinInterfaceIdList) { var proteinDataBankFile = new ProteinDataBankFormat.ProteinDataBankFile(pdbFilename); var result = new List <string>(); var foundEndModel = false; var levelModel = 0; for (var proteinDataBankFileRecordIndex = 0; proteinDataBankFileRecordIndex < proteinDataBankFile.Count; proteinDataBankFileRecordIndex++) { var record = proteinDataBankFile.NextRecord(); if (record == null) { continue; } if (record.GetType() == typeof(MODEL_Record)) { levelModel++; } else if (record.GetType() == typeof(ENDMDL_Record)) { foundEndModel = true; levelModel--; } else if (record.GetType() == typeof(ATOM_Record)) { var atom = (ATOM_Record)record; var atomResSeq = ProteinDataBankFileOperations.NullableTryParseInt32(atom.resSeq.FieldValue); if (atomResSeq != null) { var atomChain = atom.chainID.FieldValue; if (!foundEndModel && proteinInterfaceIdList.Any(s => SpreadsheetFileHandler.AlphabetLetterRollOver(s.ChainId) == atomChain && atomResSeq >= s.FirstPosition && atomResSeq <= s.LastPosition)) { result.Add(record.ColumnFormatLine); } } } else if (record.GetType() == typeof(HETATM_Record)) { var hetatm = (HETATM_Record)record; var atomResSeq = ProteinDataBankFileOperations.NullableTryParseInt32(hetatm.resSeq.FieldValue); if (atomResSeq == null) { continue; } var atomChain = hetatm.chainID.FieldValue; if (!foundEndModel && proteinInterfaceIdList.Any(s => SpreadsheetFileHandler.AlphabetLetterRollOver(s.ChainId) == atomChain && atomResSeq >= s.FirstPosition && atomResSeq <= s.LastPosition)) { result.Add(record.ColumnFormatLine); } } else if (record.GetType() == typeof(LINK_Record)) { var link = (LINK_Record)record; var atomResSeq1 = ProteinDataBankFileOperations.NullableTryParseInt32(link.resSeq1.FieldValue); var atomResSeq2 = ProteinDataBankFileOperations.NullableTryParseInt32(link.resSeq2.FieldValue); if (atomResSeq1 == null || atomResSeq2 == null) { continue; } if (!foundEndModel && proteinInterfaceIdList.Any(s => ((atomResSeq1 >= s.FirstPosition && atomResSeq1 <= s.LastPosition) && (atomResSeq2 >= s.FirstPosition && atomResSeq2 <= s.LastPosition)) || ((atomResSeq2 >= s.FirstPosition && atomResSeq2 <= s.LastPosition) && (atomResSeq1 >= s.FirstPosition && atomResSeq1 <= s.LastPosition)))) { result.Add(record.ColumnFormatLine); } } else if (record.GetType() == typeof(ANISOU_Record)) { var anisou = (ANISOU_Record)record; var atomResSeq = ProteinDataBankFileOperations.NullableTryParseInt32(anisou.resSeq.FieldValue); if (atomResSeq == null) { continue; } var atomChain = anisou.chainID.FieldValue; if (!foundEndModel && proteinInterfaceIdList.Any(s => SpreadsheetFileHandler.AlphabetLetterRollOver(s.ChainId) == atomChain && atomResSeq >= s.FirstPosition && atomResSeq <= s.LastPosition)) { result.Add(record.ColumnFormatLine); } } else if (record.GetType() == typeof(TER_Record)) { var ter = (TER_Record)record; var atomChain = ter.chainID.FieldValue; if (!foundEndModel && proteinInterfaceIdList.Any(s => SpreadsheetFileHandler.AlphabetLetterRollOver(s.ChainId) == atomChain)) { result.Add(record.ColumnFormatLine); } } else { if (!foundEndModel || levelModel == 0) { result.Add(record.ColumnFormatLine); } } } return(result); }
/// <summary> /// This method returns a dictionary entry for each protein id (pdb id), with a list of interaction vectors /// </summary> /// <returns></returns> public static List <VectorProteinInterfaceWhole> LoadProteinInterfaceVectorFromFiles( CancellationToken cancellationToken, decimal maxAtomInterationDistance, decimal minimumProteinInterfaceDensity, string[] sequenceListFileArray, string[] pdbFileDirectoryLocationArray, ProgressActionSet progressActionSet) { if (sequenceListFileArray == null) { throw new ArgumentNullException(nameof(sequenceListFileArray)); } if (pdbFileDirectoryLocationArray == null) { throw new ArgumentNullException(nameof(pdbFileDirectoryLocationArray)); } var vectorProteinInterfaceWholeList = new List <VectorProteinInterfaceWhole>(); // 1: Open list of sequences already cleaned to have only symmetrical homodimers (fasta file only contains 100% symmetrical homodimers with all other junk removed - but could have any number of proteinInterfaces per chain) List <ISequence> sequenceList = SequenceFileHandler.LoadSequenceFileList(sequenceListFileArray, StaticValues.MolNameProteinAcceptedValues); var pdbIdChainIdList = ProteinDataBankFileOperations.PdbIdChainIdList(sequenceList); // 2: Get a list of the unique ids for the sequences List <string> pdbIdList = FilterProteins.SequenceListToPdbIdList(sequenceList); if (pdbIdList == null || pdbIdList.Count == 0) { throw new ArgumentOutOfRangeException(nameof(sequenceListFileArray), "Error loading PDB ID list"); } // 3: Get a list of PDB files found in user specified directory string[] pdbFilesArray = ProteinDataBankFileOperations.GetPdbFilesArray(pdbFileDirectoryLocationArray); ProgressActionSet.StartAction(pdbFilesArray.Length, progressActionSet); var startTicks = DateTime.Now.Ticks; // 4: Loop through each pdb file for (int pdbFileNumber = 0; pdbFileNumber < pdbFilesArray.Length; pdbFileNumber++) // +1 is for progress update { ProgressActionSet.ProgressAction(1, progressActionSet); ProgressActionSet.EstimatedTimeRemainingAction(startTicks, pdbFileNumber + 1, pdbFilesArray.Length, progressActionSet); // get unique id of pdb file string pdbFilename = pdbFilesArray[pdbFileNumber]; string proteinId = ProteinDataBankFileOperations.PdbIdFromPdbFilename(pdbFilename); // check pdb unique id was in the loaded sequence list if (!pdbIdList.Contains(proteinId)) { continue; } ClusterProteinDataBankFileResult clusterPdbFileResult = Clustering.ClusterProteinDataBankFile(cancellationToken, maxAtomInterationDistance, minimumProteinInterfaceDensity, pdbFilename, pdbIdChainIdList, ClusteringMethodOptions.ClusterWithResidueSequenceIndex, -1, -1, progressActionSet); if (clusterPdbFileResult == null) { continue; } List <ProteinInterfaceSequenceAndPositionData> proteinInterfaceSequenceAndPositionDataList = clusterPdbFileResult.ProteinInterfaceAnalysisResultData.ProteinInterfacesSequenceAndPositionDataList; proteinInterfaceSequenceAndPositionDataList = proteinInterfaceSequenceAndPositionDataList.OrderBy(a => a.FullProteinInterfaceId.ProteinId).ThenBy(a => a.FullProteinInterfaceId.ChainId).ThenBy(a => a.FullProteinInterfaceId.ProteinInterfaceId).ToList(); for (int proteinInterfaceSequenceAndPositionDataListIndex = 0; proteinInterfaceSequenceAndPositionDataListIndex < proteinInterfaceSequenceAndPositionDataList.Count; proteinInterfaceSequenceAndPositionDataListIndex++) { ProteinInterfaceSequenceAndPositionData proteinInterfaceSequenceAndPositionData = proteinInterfaceSequenceAndPositionDataList[proteinInterfaceSequenceAndPositionDataListIndex]; var seq = sequenceList.FirstOrDefault(a => { var p = SequenceIdSplit.SequenceIdToPdbIdAndChainId(a.ID); return(p.PdbId.ToUpperInvariant() == proteinInterfaceSequenceAndPositionData.FullProteinInterfaceId.ProteinId.ToUpperInvariant() && p.ChainId.ToUpperInvariant() == proteinInterfaceSequenceAndPositionData.ChainIdLetter.ToUpperInvariant()); }); var seqLen = seq != null ? seq.Count : -1; var vectorProteinInterfaceWholeFwd = MakeVectorProteinInterfaceWhole(pdbFilename, proteinInterfaceSequenceAndPositionData, false, false); vectorProteinInterfaceWholeFwd.FullSequenceLength = seqLen; vectorProteinInterfaceWholeList.Add(vectorProteinInterfaceWholeFwd); var vectorProteinInterfaceWholeRev = MakeVectorProteinInterfaceWhole(pdbFilename, proteinInterfaceSequenceAndPositionData, true, false); vectorProteinInterfaceWholeRev.FullSequenceLength = seqLen; vectorProteinInterfaceWholeList.Add(vectorProteinInterfaceWholeRev); } } ProgressActionSet.FinishAction(true, progressActionSet); vectorProteinInterfaceWholeList = vectorProteinInterfaceWholeList.OrderBy(a => a.FullProteinInterfaceId.ProteinId).ThenBy(a => a.FullProteinInterfaceId.ChainId).ThenBy(a => a.FullProteinInterfaceId.ProteinInterfaceId).ToList(); return(vectorProteinInterfaceWholeList); }