Exemplo n.º 1
0
        public static ATOM_Record FindAtomInsideSingularInteractionsChain(ProteinChainListContainer singularAaToAaInteractions, int chainIndex, int residueSequenceIndex)
        {
            if (ParameterValidation.IsProteinChainListContainerNullOrEmpty(singularAaToAaInteractions))
            {
                return(null);
            }

            if (ParameterValidation.IsChainIndexInvalid(chainIndex))
            {
                throw new ArgumentOutOfRangeException(nameof(chainIndex));
            }

            if (ParameterValidation.IsResidueSequenceIndexInvalid(residueSequenceIndex, true))
            {
                throw new ArgumentOutOfRangeException(nameof(residueSequenceIndex));
            }

            // Loop through atoms in specified chain to find atom with given residue sequence index
            for (int atomIndex = 0; atomIndex < singularAaToAaInteractions.ChainList[chainIndex].AtomList.Count; atomIndex++)
            {
                ATOM_Record atom = singularAaToAaInteractions.ChainList[chainIndex].AtomList[atomIndex];

                if (ProteinDataBankFileOperations.NullableTryParseInt32(atom.resSeq.FieldValue) == residueSequenceIndex)
                {
                    return(atom);
                }
            }

            return(null);
        }
        public static string[,] MotifSpreadsheet(List <MotifHitSpreadsheetRecord> motifHitSpreadsheetRecordList)
        {
            if (motifHitSpreadsheetRecordList == null)
            {
                throw new ArgumentNullException(nameof(motifHitSpreadsheetRecordList));
            }

            var result = new List <string[]>
            {
                Header().ToStrings()
            };

            foreach (var record in motifHitSpreadsheetRecordList
                     .OrderByDescending(a => ProteinDataBankFileOperations.NullableTryParseInt32(string.IsNullOrWhiteSpace(a.TotalDatabaseHitsPdb) ? "0" : a.TotalDatabaseHitsPdb)
                                        + ProteinDataBankFileOperations.NullableTryParseInt32(string.IsNullOrWhiteSpace(a.TotalDatabaseHitsUniProtKbSwissProt) ? "0" : a.TotalDatabaseHitsUniProtKbSwissProt)
                                        + ProteinDataBankFileOperations.NullableTryParseInt32(string.IsNullOrWhiteSpace(a.TotalDatabaseHitsUniProtKbTrEmbl) ? "0" : a.TotalDatabaseHitsUniProtKbTrEmbl))

                     .ThenByDescending(a => ProteinDataBankFileOperations.NullableTryParseInt32(string.IsNullOrWhiteSpace(a.TotalDatabaseSequencesPdb) ? "0" : a.TotalDatabaseSequencesPdb)
                                       + ProteinDataBankFileOperations.NullableTryParseInt32(string.IsNullOrWhiteSpace(a.TotalDatabaseSequencesUniProtKbSwissProt) ? "0" : a.TotalDatabaseSequencesUniProtKbSwissProt)
                                       + ProteinDataBankFileOperations.NullableTryParseInt32(string.IsNullOrWhiteSpace(a.TotalDatabaseSequencesUniProtKbTrEmbl) ? "0" : a.TotalDatabaseSequencesUniProtKbTrEmbl))

                     .ThenByDescending(a => ProteinDataBankFileOperations.NullableTryParseInt32(string.IsNullOrWhiteSpace(a.TotalTimesSuggestedFwd) ? "0" : a.TotalTimesSuggestedFwd)
                                       + ProteinDataBankFileOperations.NullableTryParseInt32(string.IsNullOrWhiteSpace(a.TotalTimesSuggestedRev) ? "0" : a.TotalTimesSuggestedRev)
                                       + ProteinDataBankFileOperations.NullableTryParseInt32(string.IsNullOrWhiteSpace(a.TotalTimesSuggestedMix) ? "0" : a.TotalTimesSuggestedMix)))
            {
                result.Add(record.ToStrings());
            }

            return(ConvertTypes.StringJagged2DArrayTo2DArray(result.ToArray()));
        }
Exemplo n.º 3
0
        public static ATOM_Record FindAtomInsidePdbFileChain(ProteinChainListContainer pdbFileChains, int chainIndex, int residueSequenceIndex)
        {
            if (ParameterValidation.IsProteinChainListContainerNullOrEmpty(pdbFileChains))
            {
                return(null);
            }

            if (ParameterValidation.IsChainIndexInvalid(chainIndex))
            {
                throw new ArgumentOutOfRangeException(nameof(chainIndex));
            }

            if (ParameterValidation.IsResidueSequenceIndexInvalid(residueSequenceIndex, true))
            {
                throw new ArgumentOutOfRangeException(nameof(residueSequenceIndex));
            }

            for (int memberIndex = 0; memberIndex < pdbFileChains.ChainList[chainIndex].AtomList.Count; memberIndex++)
            {
                ATOM_Record atom = pdbFileChains.ChainList[chainIndex].AtomList[memberIndex];

                if (ProteinDataBankFileOperations.NullableTryParseInt32(atom.resSeq.FieldValue) == residueSequenceIndex)
                {
                    return(atom);
                }
            }

            return(null);
        }
Exemplo n.º 4
0
        /// <summary>
        ///     Get the lowest (minimum) and highest (maximum) residue sequence index (as found in the pdb file) in an interaction
        ///     proteinInterface.
        /// </summary>
        /// <param name="proteinInterface"></param>
        /// <param name="singularAaToAaInteractions"></param>
        /// <param name="chainIndex"></param>
        /// <returns></returns>
        public static MinMax MinMaxResidueSequenceIndex(ClusteringFullResultListContainer.Chain.Stage.Cluster proteinInterface, ProteinChainListContainer singularAaToAaInteractions, int chainIndex)
        {
            if (ParameterValidation.IsClusterNullOrEmpty(proteinInterface))
            {
                throw new ArgumentNullException(nameof(proteinInterface));
            }

            if (ParameterValidation.IsProteinChainListContainerNullOrEmpty(singularAaToAaInteractions))
            {
                throw new ArgumentNullException(nameof(singularAaToAaInteractions));
            }

            if (ParameterValidation.IsChainIndexInvalid(chainIndex))
            {
                throw new ArgumentOutOfRangeException(nameof(chainIndex));
            }

            int proteinInterfaceMin = 0;
            int proteinInterfaceMax = 0;

            for (int memberIndex = 0; memberIndex < proteinInterface.AtomIndexList.Count; memberIndex++)
            {
                int member = proteinInterface.AtomIndexList[memberIndex];

                ATOM_Record atom = singularAaToAaInteractions.ChainList[chainIndex].AtomList[member];

                var residueSequenceIndex = ProteinDataBankFileOperations.NullableTryParseInt32(atom.resSeq.FieldValue);

                if (residueSequenceIndex == null)
                {
                    continue;
                }

                if (memberIndex == 0 || residueSequenceIndex < proteinInterfaceMin)
                {
                    proteinInterfaceMin = residueSequenceIndex.Value;
                }

                if (memberIndex == 0 || residueSequenceIndex > proteinInterfaceMax)
                {
                    proteinInterfaceMax = residueSequenceIndex.Value;
                }
            }

            return(new MinMax(proteinInterfaceMin, proteinInterfaceMax));
        }
        /// <summary>
        ///     This method finds chemical interaction bonds between atoms on separate chains.
        /// </summary>
        /// <param name="cancellationToken"></param>
        /// <param name="pdbFilename">The filename of the PDB file to parse for chemical interactions.</param>
        /// <param name="pdbIdChainIdList"></param>
        /// <param name="breakWhenFirstInteractionFound"></param>
        /// <param name="totalThreads"></param>
        /// <returns>Returns a list of atom pairs which are close enough in distance to have chemical interactions.</returns>
        public static List <AtomPair> FindInteractions(CancellationToken cancellationToken, decimal maxAtomInterationDistance /*= 8.0m*/, string pdbFilename, Dictionary <string, List <string> > pdbIdChainIdList, bool breakWhenFirstInteractionFound = false, int totalThreads = -1, bool sort = true, int requiredChains = -1)
        {
            if (ParameterValidation.IsLoadFilenameInvalid(pdbFilename)) // && ParameterValidation.IsProteinChainListContainerNullOrEmpty(pdbFileChains))
            {
                throw new ArgumentOutOfRangeException(nameof(pdbFilename));
            }

            string proteinId = ProteinDataBankFileOperations.PdbIdFromPdbFilename(pdbFilename);

            bool useCache = false;

            if (useCache && !string.IsNullOrWhiteSpace(proteinId))
            {
                var cachedInteractions = InteractionsCache.LoadPdbInteractionCache(proteinId, requiredChains);

                if (cachedInteractions != null)
                {
                    return(cachedInteractions);
                }
            }

            var chainIdList = pdbIdChainIdList != null ? (proteinId != null && pdbIdChainIdList.ContainsKey(proteinId) ? pdbIdChainIdList[proteinId].ToArray() : null) : null;

            ProteinChainListContainer proteinFileChains = ProteinDataBankFileOperations.PdbAtomicChains(pdbFilename, chainIdList, requiredChains, requiredChains, true);

            List <AtomPair> atomPairList = FindInteractions(cancellationToken, maxAtomInterationDistance, proteinId, pdbIdChainIdList, proteinFileChains, breakWhenFirstInteractionFound, totalThreads, sort, requiredChains);

            if (atomPairList == null)
            {
                // only save if null, otherwise, already saved in other method
                atomPairList = new List <AtomPair>();
                if (useCache)
                {
                    InteractionsCache.SavePdbInteractionCache(proteinId, atomPairList, requiredChains);
                }
            }

            return(atomPairList);
        }
Exemplo n.º 6
0
        public static int AtomIndexPositionInProteinInterface(
            ProteinChainListContainer singularAaToAaInteractions,
            ClusteringFullResultListContainer proteinInterfacesClusteringResult,
            int[] detectedBestStages,
            int chainIndex,
            int proteinInterfaceIndex,
            ATOM_Record atomPositionToFind)
        {
            List <ClusteringFullResultListContainer.Chain.Stage.Cluster> proteinInterfaceList = proteinInterfacesClusteringResult.ChainList[chainIndex].StageList[detectedBestStages[chainIndex]].ClusterList;

            List <ClusteringFullResultListContainer.Chain.Stage.Cluster> nonEmptyProteinInterfaceList = proteinInterfaceList.Where(a => a != null && a.AtomIndexList != null && a.AtomIndexList.Count > 0).ToList();

            if (proteinInterfaceIndex > nonEmptyProteinInterfaceList.Count - 1)
            {
                throw new ArgumentOutOfRangeException(nameof(proteinInterfaceIndex), proteinInterfaceIndex, "proteinInterfaceIndex was greater than the number of proteinInterfaces found.");
            }

            ClusteringFullResultListContainer.Chain.Stage.Cluster proteinInterface = nonEmptyProteinInterfaceList[proteinInterfaceIndex];

            if (proteinInterface.AtomIndexList.Count == 0)
            {
                return(-1);
            }

            MinMax minMaxResidueSequenceIndex = MinMaxResidueSequenceIndex(proteinInterface, singularAaToAaInteractions, chainIndex);

            int proteinInterfaceLength = CalculateProteinInterfaceLength(minMaxResidueSequenceIndex.Min, minMaxResidueSequenceIndex.Max);

            var residueSequenceToFind = ProteinDataBankFileOperations.NullableTryParseInt32(atomPositionToFind.resSeq.FieldValue);

            if (residueSequenceToFind == null)
            {
                return(-1);
            }

            int index = residueSequenceToFind.Value - minMaxResidueSequenceIndex.Min; // zero based

            return(index);
        }
Exemplo n.º 7
0
        public static string[] MakeInteractionsOutput(CancellationToken cancellationToken, decimal maxAtomInterationDistance, string[] pdbFilesList, Dictionary <string, List <string> > pdbIdChainIdList, ProgressActionSet progressActionSet, bool outputToGui)
        {
            var interactionTasks = new List <Task <string> >();

            ProgressActionSet.StartAction(pdbFilesList.Length, progressActionSet);


            foreach (string pdbFilename in pdbFilesList)
            {
                string _pdbFilename = pdbFilename;
                while (interactionTasks.Count(t => t != null && !t.IsCompleted) >= Environment.ProcessorCount * 10)
                {
                    Task.WaitAny(interactionTasks.ToArray <Task>());
                }

                var interactionTask = Task.Run(() =>
                {
                    if (cancellationToken.IsCancellationRequested)
                    {
                        return(null);
                    }

                    List <AtomPair> interactionsList = SearchInteractions.FindInteractions(cancellationToken, maxAtomInterationDistance, _pdbFilename, pdbIdChainIdList);

                    string pdbId = ProteinDataBankFileOperations.PdbIdFromPdbFilename(_pdbFilename);
                    if (string.IsNullOrEmpty(pdbId))
                    {
                        //pdbId = _pdbFilename;
                        throw new ArgumentNullException(nameof(pdbFilesList), "The file " + _pdbFilename + " has an invalid name.");
                    }

                    if (interactionsList == null)
                    {
                        interactionsList = new List <AtomPair>();
                    }

                    interactionsList = interactionsList.OrderBy(o => o.Distance).ToList();

                    var interactionsString = FormatInteractionOutput(pdbId, interactionsList);

                    if (outputToGui)
                    {
                        ProgressActionSet.Report(interactionsString, progressActionSet);
                    }

                    ProgressActionSet.ProgressAction(1, progressActionSet);
                    //ProgressActionSet.EstimatedTimeRemainingAction(startTicks, );

                    //////Console.WriteLine(_pdbFilename);

                    return(interactionsString);
                }, cancellationToken);

                interactionTasks.Add(interactionTask);
            }

            Task.WaitAll(interactionTasks.Where(t => t != null && !t.IsCompleted).ToArray <Task>());

            var interactionsStringsList = interactionTasks.OrderBy(t => t.Id).Where(t => t != null && t.IsCompleted && !t.IsCanceled && !t.IsFaulted && t.Result != null).Select(t => t.Result).ToArray();

            ProgressActionSet.FinishAction(true, progressActionSet);

            return(interactionsStringsList);
        }
        public static string[,] MotifSpreadsheet(List <ProproteinInterfaceSpreadsheetRecord> proproteinInterfaceSpreadsheetRecordList)
        {
            if (proproteinInterfaceSpreadsheetRecordList == null)
            {
                throw new ArgumentNullException(nameof(proproteinInterfaceSpreadsheetRecordList));
            }

            var result = new List <string[]>();

            result.Add(Header().ToStrings());

            result.AddRange(proproteinInterfaceSpreadsheetRecordList.OrderByDescending(a => ProteinDataBankFileOperations.NullableTryParseInt32(a.TotalFound)).Select(record => record.ToStrings()));

            return(ConvertTypes.StringJagged2DArrayTo2DArray(result.ToArray()));
        }
        public static List <MotifHitSpreadsheetRecord> MotifRecordList(Dictionary <string, MotifCounter> motifDistinctWithCount)
        {
            if (motifDistinctWithCount == null)
            {
                throw new ArgumentNullException(nameof(motifDistinctWithCount));
            }

            var result = new List <MotifHitSpreadsheetRecord>();

            foreach (var kvp in motifDistinctWithCount)
            {
                var motif        = kvp.Key;
                var motifCounter = kvp.Value;

                ProproteinInterfaceMatchSet pdb = null;
                ProproteinInterfaceMatchSet sp  = null;
                ProproteinInterfaceMatchSet tr  = null;

                var tasks = new List <Task>()
                {
                    Task.Run(() => pdb = ProproteinInterfaceServiceClient.LoadProproteinInterfaceResponse(new ScanProproteinInterfaceParameters()
                    {
                        sig = motif, db = ScanProproteinInterfaceParameters.TargetProteinDatabases.ProteinDataBank
                    })),
                    Task.Run(() => sp = ProproteinInterfaceServiceClient.LoadProproteinInterfaceResponse(new ScanProproteinInterfaceParameters()
                    {
                        sig = motif, db = ScanProproteinInterfaceParameters.TargetProteinDatabases.UniProtKbSwissProt
                    })),
                    Task.Run(() => tr = ProproteinInterfaceServiceClient.LoadProproteinInterfaceResponse(new ScanProproteinInterfaceParameters()
                    {
                        sig = motif, db = ScanProproteinInterfaceParameters.TargetProteinDatabases.UniProtKbTrEmbl
                    })),
                };
                Task.WaitAll(tasks.Where(t => !t.IsCompleted && !t.IsCanceled && !t.IsFaulted).ToArray());

                var totalPdbMatch = pdb != null?ProteinDataBankFileOperations.NullableTryParseInt32(pdb.NMatch) : null;

                if (totalPdbMatch == null)
                {
                    totalPdbMatch = -1;
                }

                var totalSpMatch = sp != null?ProteinDataBankFileOperations.NullableTryParseInt32(sp.NMatch) : null;

                if (totalSpMatch == null)
                {
                    totalSpMatch = -1;
                }

                var totalTrMatch = tr != null?ProteinDataBankFileOperations.NullableTryParseInt32(tr.NMatch) : null;

                if (totalTrMatch == null)
                {
                    totalTrMatch = -1;
                }

                var totalMatchOverall = ((totalPdbMatch > -1 ? totalPdbMatch : 0) + (totalSpMatch > -1 ? totalSpMatch : 0) + (totalTrMatch > -1 ? totalTrMatch : 0));
                if (totalPdbMatch == -1 && totalSpMatch == -1 && totalTrMatch == -1)
                {
                    totalMatchOverall = -1;
                }

                var totalPdbSeq = pdb != null?ProteinDataBankFileOperations.NullableTryParseInt32(pdb.NSeq) : null;

                if (totalPdbSeq == null)
                {
                    totalPdbSeq = -1;
                }

                var totalSpSeq = sp != null?ProteinDataBankFileOperations.NullableTryParseInt32(sp.NSeq) : null;

                if (totalSpSeq == null)
                {
                    totalSpSeq = -1;
                }

                var totalTrSeq = tr != null?ProteinDataBankFileOperations.NullableTryParseInt32(tr.NSeq) : null;

                if (totalTrSeq == null)
                {
                    totalTrSeq = -1;
                }

                var totalSeqOverall = ((totalPdbSeq > -1 ? totalPdbSeq : 0) + (totalSpSeq > -1 ? totalSpSeq : 0) + (totalTrSeq > -1 ? totalTrSeq : 0));
                if (totalPdbSeq == -1 && totalSpSeq == -1 && totalTrSeq == -1)
                {
                    totalSeqOverall = -1;
                }

                var totalTimesOverall = (motifCounter.TotalFwd + motifCounter.TotalRev + motifCounter.TotalMix);
                var record            = new MotifHitSpreadsheetRecord()
                {
                    Motif = kvp.Key,
                    //MotifTooGeneral = "" + kvp.Value.MotifTooGeneral,
                    TotalTimesSuggestedFwd     = "" + motifCounter.TotalFwd,
                    TotalTimesSuggestedRev     = "" + motifCounter.TotalRev,
                    TotalTimesSuggestedMix     = "" + motifCounter.TotalMix,
                    TotalTimesSuggestedOverall = "" + totalTimesOverall,

                    //TotalTimesSuggestedFwdInHeterodimers = "" + motifCounter.TotalFwdInHeterodimers,
                    //TotalTimesSuggestedRevInHeterodimers = "" + motifCounter.TotalRevInHeterodimers,
                    //TotalTimesSuggestedMixInHeterodimers = "" + motifCounter.TotalMixInHeterodimers,

                    //TotalTimesSuggestedFwdInHomodimers = "" + motifCounter.TotalFwdInHomodimers,
                    //TotalTimesSuggestedRevInHomodimers = "" + motifCounter.TotalRevInHomodimers,
                    //TotalTimesSuggestedMixInHomodimers = "" + motifCounter.TotalMixInHomodimers,

                    TotalDatabaseHitsPdb                     = "" + totalPdbMatch,
                    TotalDatabaseSequencesPdb                = "" + totalPdbSeq,
                    TotalDatabaseHitsUniProtKbSwissProt      = "" + totalSpMatch,
                    TotalDatabaseSequencesUniProtKbSwissProt = "" + totalSpSeq,
                    TotalDatabaseHitsUniProtKbTrEmbl         = "" + totalTrMatch,
                    TotalDatabaseSequencesUniProtKbTrEmbl    = "" + totalTrSeq,

                    TotalDatabaseHitsOverall = "" + totalMatchOverall,
                    TotalSequencesOverall    = "" + totalSeqOverall,
                };

                result.Add(record);
            }

            return(result);
        }
Exemplo n.º 10
0
        public static string ProteinInterfaceSecondaryStructure(string pdbFilename, string chainId = null, int startResidueSequenceIndex = -1, int endResidueSequenceIndex = -1, bool reversedSequence = false)
        {
            if (string.IsNullOrWhiteSpace(pdbFilename))
            {
                return("");
            }

            var pdbId = ProteinDataBankFileOperations.PdbIdFromPdbFilename(pdbFilename);

            var dsspFilename = pdbFilename + ".dssp";

            if (!File.Exists(dsspFilename))
            {
                return("");
            }

            var secondaryStructure = DsspFormatFile.LoadDsspFile(dsspFilename);

            if (chainId != null && secondaryStructure.FirstOrDefault(a => a.FieldChain.FieldValue == chainId) == null)
            {
                return("");
            }

            if (startResidueSequenceIndex == -1)
            {
                startResidueSequenceIndex = secondaryStructure.Where(a => chainId == null || a.FieldChain.FieldValue == chainId).Min(a => int.Parse(a.FieldPdbResidueSequenceIndex.FieldValue));
            }
            if (endResidueSequenceIndex == -1)
            {
                endResidueSequenceIndex = secondaryStructure.Where(a => chainId == null || a.FieldChain.FieldValue == chainId).Max(a => int.Parse(a.FieldPdbResidueSequenceIndex.FieldValue));
            }


            // dssp specification says order may not be correct
            secondaryStructure = secondaryStructure.Where(a => !string.IsNullOrWhiteSpace(a.FieldChain.FieldValue) && !string.IsNullOrWhiteSpace(a.FieldPdbResidueSequenceIndex.FieldValue)).OrderBy(a => a.FieldChain.FieldValue).ThenBy(a => ProteinDataBankFileOperations.NullableTryParseInt32(a.FieldPdbResidueSequenceIndex.FieldValue)).ToList();

            var proteinInterfaceLen = ProteinInterfaceDetection.CalculateProteinInterfaceLength(startResidueSequenceIndex, endResidueSequenceIndex);

            char[] result = new char[proteinInterfaceLen];
            for (int index = 0; index < result.Length; index++)
            {
                result[index] = '_';
            }

            foreach (var record in secondaryStructure.Where(a => chainId == null || a.FieldChain.FieldValue == chainId))
            {
                var resSeq = ProteinDataBankFileOperations.NullableTryParseInt32(record.FieldPdbResidueSequenceIndex.FieldValue);

                if (resSeq == null || resSeq < startResidueSequenceIndex || resSeq > endResidueSequenceIndex)
                {
                    continue;
                }

                var position = resSeq - startResidueSequenceIndex;

                if (record.FieldSecondaryStructure.FieldValue.Length == 0)
                {
                    continue;
                }

                result[position.Value] = record.FieldSecondaryStructure.FieldValue[0];
            }

            if (reversedSequence)
            {
                Array.Reverse(result);
            }

            return(new string(result));
        }
Exemplo n.º 11
0
        /// <summary>
        ///     This method finds interactions between detected proteinInterfaces.  It is specific to dimers with exactly two chains.  [Chain A
        ///     ProteinInterface Index, Chain B ProteinInterface Index]
        /// </summary>
        /// <param name="cancellationToken"></param>
        /// <param name="pdbFilename"></param>
        /// <param name="pdbFileChains"></param>
        /// <param name="chainInteractingAtomLists"></param>
        /// <param name="fullClusteringResult"></param>
        /// <param name="proteinInterfacesClusteringResult"></param>
        /// <param name="detectedFinalStageIndexes"></param>
        /// <param name="pdbIdChainIdList"></param>
        /// <returns></returns>
        public static InteractionBetweenProteinInterfacesListContainer FindInteractionsBetweenAnyProteinInterfaces(
            CancellationToken cancellationToken,
            decimal maxAtomInterationDistance,
            string pdbFilename,
            Dictionary<string, List<string>> pdbIdChainIdList,
            ProteinChainListContainer pdbFileChains,
            ProteinChainListContainer chainInteractingAtomLists,
            ClusteringFullResultListContainer fullClusteringResult,
            ClusteringFullResultListContainer proteinInterfacesClusteringResult,
            int[] detectedFinalStageIndexes)
        {
            if (string.IsNullOrWhiteSpace(pdbFilename))
            {
                throw new ArgumentOutOfRangeException(nameof(pdbFilename));
            }

            if (!File.Exists(pdbFilename))
            {
                throw new FileNotFoundException("File not found", pdbFilename);
            }

            if (ParameterValidation.IsProteinChainListContainerNullOrEmpty(chainInteractingAtomLists))
            {
                throw new ArgumentOutOfRangeException(nameof(chainInteractingAtomLists));
            }

            if (ParameterValidation.IsClusteringFullResultListContainerNullOrEmpty(fullClusteringResult))
            {
                throw new ArgumentOutOfRangeException(nameof(fullClusteringResult));
            }

            if (ParameterValidation.IsClusteringFullResultListContainerNullOrEmpty(proteinInterfacesClusteringResult))
            {
                throw new ArgumentOutOfRangeException(nameof(proteinInterfacesClusteringResult));
            }

            if (ParameterValidation.IsIntArrayNullOrEmpty(detectedFinalStageIndexes))
            {
                throw new ArgumentOutOfRangeException(nameof(detectedFinalStageIndexes));
            }

            string proteinId = ProteinDataBankFileOperations.PdbIdFromPdbFilename(pdbFilename);

            var interactionBetweenProteinInterfacesListContainer = new InteractionBetweenProteinInterfacesListContainer();

            List<AtomPair> interactionList;

            if (pdbFileChains != null && pdbFileChains.ChainList != null && pdbFileChains.ChainList.Count > 0)
            {
                interactionList = SearchInteractions.FindInteractions(cancellationToken, maxAtomInterationDistance, proteinId, pdbIdChainIdList, pdbFileChains); //, false, -1, pdbFileChains);
            }
            else
            {
                interactionList = SearchInteractions.FindInteractions(cancellationToken, maxAtomInterationDistance, pdbFilename, pdbIdChainIdList);
            }

            var interactionInsideProteinInterfaceArray = new bool[interactionList.Count];

            ////////Console.WriteLine("");
            ////////Console.WriteLine("");
            ////////Console.WriteLine("------------------ START ------------------");
            //int c = 0;

            for (int chainIndexA = 0; chainIndexA < proteinInterfacesClusteringResult.ChainList.Count; chainIndexA++)
            {
                for (int chainIndexB = 0; chainIndexB < proteinInterfacesClusteringResult.ChainList.Count; chainIndexB++)
                {
                    if (chainIndexA == chainIndexB || chainIndexB < chainIndexA)
                    {
                        continue;
                    }

                    List<ClusteringFullResultListContainer.Chain.Stage.Cluster> proteinInterfaceListA = proteinInterfacesClusteringResult.ChainList[chainIndexA].StageList[detectedFinalStageIndexes[chainIndexA]].ClusterList;
                    List<ClusteringFullResultListContainer.Chain.Stage.Cluster> proteinInterfaceListB = proteinInterfacesClusteringResult.ChainList[chainIndexB].StageList[detectedFinalStageIndexes[chainIndexB]].ClusterList;

                    int realProteinInterfaceIndexA = -1;

                    for (int proteinInterfaceIndexA = 0; proteinInterfaceIndexA < proteinInterfaceListA.Count; proteinInterfaceIndexA++)
                    {
                        int realProteinInterfaceIndexB = -1;
                        List<int> proteinInterfaceMemberIndexListA = proteinInterfaceListA[proteinInterfaceIndexA].AtomIndexList;
                        List<ATOM_Record> proteinInterfaceAtomListA = proteinInterfaceMemberIndexListA.Select(proteinInterfaceMemberIndexA => chainInteractingAtomLists.ChainList[chainIndexA].AtomList[proteinInterfaceMemberIndexA]).ToList();
                        proteinInterfaceAtomListA = proteinInterfaceAtomListA.OrderBy(a => ProteinDataBankFileOperations.NullableTryParseInt32(a.resSeq.FieldValue)).ToList();
                        if (proteinInterfaceAtomListA.Count > 0)
                        {
                            realProteinInterfaceIndexA++;
                        }
                        else
                        {
                            continue;
                        }

                        for (int proteinInterfaceIndexB = 0; proteinInterfaceIndexB < proteinInterfaceListB.Count; proteinInterfaceIndexB++)
                        {
                            List<int> proteinInterfaceMemberIndexListB = proteinInterfaceListB[proteinInterfaceIndexB].AtomIndexList;
                            List<ATOM_Record> proteinInterfaceAtomListB = proteinInterfaceMemberIndexListB.Select(proteinInterfaceMemberIndexB => chainInteractingAtomLists.ChainList[chainIndexB].AtomList[proteinInterfaceMemberIndexB]).ToList();
                            proteinInterfaceAtomListB = proteinInterfaceAtomListB.OrderBy(b => ProteinDataBankFileOperations.NullableTryParseInt32(b.resSeq.FieldValue)).ToList();
                            if (proteinInterfaceAtomListB.Count > 0)
                            {
                                realProteinInterfaceIndexB++;
                            }
                            else
                            {
                                continue;
                            }

                            for (int proteinInterfaceAtomListIndexA = 0; proteinInterfaceAtomListIndexA < proteinInterfaceAtomListA.Count; proteinInterfaceAtomListIndexA++)
                            {
                                ATOM_Record atomA = proteinInterfaceAtomListA[proteinInterfaceAtomListIndexA];

                                for (int proteinInterfaceAtomListIndexB = 0; proteinInterfaceAtomListIndexB < proteinInterfaceAtomListB.Count; proteinInterfaceAtomListIndexB++)
                                {
                                    ATOM_Record atomB = proteinInterfaceAtomListB[proteinInterfaceAtomListIndexB];

                                    //c++;
                                    ////////Console.WriteLine(c.ToString().PadLeft(5) +
                                    //                  " Chain " + chainIndexA + " (" + proteinInterfaceListA.Count(a => a.AtomIndexList.Count > 0) + " proteinInterfaces) ProteinInterface " + realProteinInterfaceIndexA + " (" + proteinInterfaceAtomListA.Count + " atoms) <--->" +
                                    //                  " Chain " + chainIndexB + " (" + proteinInterfaceListB.Count(a => a.AtomIndexList.Count > 0) + " proteinInterfaces) ProteinInterface " + realProteinInterfaceIndexB + " (" + proteinInterfaceAtomListB.Count + " atoms)  --->" +
                                    //                  " chainID " + atomA.chainID.FieldValue + " resName " + atomA.resName.FieldValue + " resSeq " + atomA.resSeq.FieldValue + " <--->" +
                                    //                  " chainID " + atomB.chainID.FieldValue + " resName " + atomB.resName.FieldValue + " resSeq " + atomB.resSeq.FieldValue);

                                    for (int interactionIndex = 0; interactionIndex < interactionList.Count; interactionIndex++)
                                    {
                                        AtomPair interaction = interactionList[interactionIndex];

                                        if ((interaction.Atom1 == atomA && interaction.Atom2 == atomB) || (interaction.Atom1 == atomB && interaction.Atom2 == atomA))
                                        {
                                            interactionInsideProteinInterfaceArray[interactionIndex] = true;

                                            var interactionBetweenProteinInterfaces = new InteractionBetweenProteinInterfaces();
                                            interactionBetweenProteinInterfacesListContainer.InteractionBetweenProteinInterfacesList.Add(interactionBetweenProteinInterfaces);

                                            interactionBetweenProteinInterfaces.Atom1.Atom = atomA;
                                            interactionBetweenProteinInterfaces.Atom1.FullProteinInterfaceId.ProteinId = proteinId;
                                            interactionBetweenProteinInterfaces.Atom1.FullProteinInterfaceId.ChainId = chainIndexA;
                                            interactionBetweenProteinInterfaces.Atom1.FullProteinInterfaceId.ProteinInterfaceId = realProteinInterfaceIndexA;

                                            interactionBetweenProteinInterfaces.Atom2.Atom = atomB;
                                            interactionBetweenProteinInterfaces.Atom2.FullProteinInterfaceId.ProteinId = proteinId;
                                            interactionBetweenProteinInterfaces.Atom2.FullProteinInterfaceId.ChainId = chainIndexB;
                                            interactionBetweenProteinInterfaces.Atom2.FullProteinInterfaceId.ProteinInterfaceId = realProteinInterfaceIndexB;
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }

            for (int interactionIndex = 0; interactionIndex < interactionInsideProteinInterfaceArray.Length; interactionIndex++)
            {
                bool interactionInsideProteinInterface = interactionInsideProteinInterfaceArray[interactionIndex];

                if (!interactionInsideProteinInterface)
                {
                    var interactionBetweenNonProteinInterfaces = new InteractionBetweenProteinInterfaces();
                    interactionBetweenProteinInterfacesListContainer.InteractionBetweenNonProteinInterfacesList.Add(interactionBetweenNonProteinInterfaces);

                    interactionBetweenNonProteinInterfaces.Atom1.Atom = interactionList[interactionIndex].Atom1;
                    interactionBetweenNonProteinInterfaces.Atom1.FullProteinInterfaceId.ProteinId = proteinId;
                    interactionBetweenNonProteinInterfaces.Atom1.FullProteinInterfaceId.ChainId = interactionList[interactionIndex].Atom1FullProteinInterfaceId.ChainId;
                    interactionBetweenNonProteinInterfaces.Atom1.FullProteinInterfaceId.ProteinInterfaceId = -1;

                    interactionBetweenNonProteinInterfaces.Atom2.Atom = interactionList[interactionIndex].Atom2;
                    interactionBetweenNonProteinInterfaces.Atom2.FullProteinInterfaceId.ProteinId = proteinId;
                    interactionBetweenNonProteinInterfaces.Atom2.FullProteinInterfaceId.ChainId = interactionList[interactionIndex].Atom2FullProteinInterfaceId.ChainId;
                    interactionBetweenNonProteinInterfaces.Atom2.FullProteinInterfaceId.ProteinInterfaceId = -1;
                }
            }

            ////////Console.WriteLine("------------------ END ------------------");

            // ensure sorted order
            interactionBetweenProteinInterfacesListContainer.InteractionBetweenProteinInterfacesList = interactionBetweenProteinInterfacesListContainer.InteractionBetweenProteinInterfacesList
                .OrderBy(a => a.Atom1.FullProteinInterfaceId.ChainId)
                .ThenBy(a => a.Atom1.FullProteinInterfaceId.ProteinInterfaceId)
                .ThenBy(a => ProteinDataBankFileOperations.NullableTryParseInt32(a.Atom1.Atom.resSeq.FieldValue))
                .ThenBy(a => a.Atom2.FullProteinInterfaceId.ChainId)
                .ThenBy(a => a.Atom2.FullProteinInterfaceId.ProteinInterfaceId)
                .ThenBy(a => ProteinDataBankFileOperations.NullableTryParseInt32(a.Atom2.Atom.resSeq.FieldValue))
                .ToList();

            interactionBetweenProteinInterfacesListContainer.InteractionBetweenNonProteinInterfacesList = interactionBetweenProteinInterfacesListContainer.InteractionBetweenNonProteinInterfacesList
                .OrderBy(a => a.Atom1.FullProteinInterfaceId.ChainId)
                .ThenBy(a => a.Atom1.FullProteinInterfaceId.ProteinInterfaceId)
                .ThenBy(a => ProteinDataBankFileOperations.NullableTryParseInt32(a.Atom1.Atom.resSeq.FieldValue))
                .ThenBy(a => a.Atom2.FullProteinInterfaceId.ChainId)
                .ThenBy(a => a.Atom2.FullProteinInterfaceId.ProteinInterfaceId)
                .ThenBy(a => ProteinDataBankFileOperations.NullableTryParseInt32(a.Atom2.Atom.resSeq.FieldValue))
                .ToList();

            // remove duplicates (as the list is sorted, duplicates will always be together in the list)
            for (int index = interactionBetweenProteinInterfacesListContainer.InteractionBetweenProteinInterfacesList.Count - 1; index > 0; index--)
            {
                InteractionBetweenProteinInterfaces lastInteractionBetweenProteinInterfaces = interactionBetweenProteinInterfacesListContainer.InteractionBetweenProteinInterfacesList[index - 1];
                InteractionBetweenProteinInterfaces thisInteractionBetweenProteinInterfaces = interactionBetweenProteinInterfacesListContainer.InteractionBetweenProteinInterfacesList[index];

                if (lastInteractionBetweenProteinInterfaces == null || thisInteractionBetweenProteinInterfaces == null)
                {
                    continue;
                }

                if (thisInteractionBetweenProteinInterfaces.Atom1.FullProteinInterfaceId.ProteinId == lastInteractionBetweenProteinInterfaces.Atom1.FullProteinInterfaceId.ProteinId &&
                    thisInteractionBetweenProteinInterfaces.Atom1.FullProteinInterfaceId.ChainId == lastInteractionBetweenProteinInterfaces.Atom1.FullProteinInterfaceId.ChainId &&
                    thisInteractionBetweenProteinInterfaces.Atom1.FullProteinInterfaceId.ProteinInterfaceId == lastInteractionBetweenProteinInterfaces.Atom1.FullProteinInterfaceId.ProteinInterfaceId &&
                    thisInteractionBetweenProteinInterfaces.Atom1.Atom == lastInteractionBetweenProteinInterfaces.Atom1.Atom &&
                    thisInteractionBetweenProteinInterfaces.Atom2.FullProteinInterfaceId.ProteinId == lastInteractionBetweenProteinInterfaces.Atom2.FullProteinInterfaceId.ProteinId &&
                    thisInteractionBetweenProteinInterfaces.Atom2.FullProteinInterfaceId.ChainId == lastInteractionBetweenProteinInterfaces.Atom2.FullProteinInterfaceId.ChainId &&
                    thisInteractionBetweenProteinInterfaces.Atom2.FullProteinInterfaceId.ProteinInterfaceId == lastInteractionBetweenProteinInterfaces.Atom2.FullProteinInterfaceId.ProteinInterfaceId &&
                    thisInteractionBetweenProteinInterfaces.Atom2.Atom == lastInteractionBetweenProteinInterfaces.Atom2.Atom)
                {
                    interactionBetweenProteinInterfacesListContainer.InteractionBetweenProteinInterfacesList.RemoveAt(index - 1);
                    //////Console.WriteLine("removed duplicate");
                }
            }

            for (int index = interactionBetweenProteinInterfacesListContainer.InteractionBetweenNonProteinInterfacesList.Count - 1; index > 0; index--)
            {
                InteractionBetweenProteinInterfaces lastInteractionBetweenProteinInterfaces = interactionBetweenProteinInterfacesListContainer.InteractionBetweenNonProteinInterfacesList[index - 1];
                InteractionBetweenProteinInterfaces thisInteractionBetweenProteinInterfaces = interactionBetweenProteinInterfacesListContainer.InteractionBetweenNonProteinInterfacesList[index];

                if (lastInteractionBetweenProteinInterfaces == null || thisInteractionBetweenProteinInterfaces == null)
                {
                    continue;
                }

                if (thisInteractionBetweenProteinInterfaces.Atom1.FullProteinInterfaceId.ProteinId == lastInteractionBetweenProteinInterfaces.Atom1.FullProteinInterfaceId.ProteinId &&
                    thisInteractionBetweenProteinInterfaces.Atom1.FullProteinInterfaceId.ChainId == lastInteractionBetweenProteinInterfaces.Atom1.FullProteinInterfaceId.ChainId &&
                    thisInteractionBetweenProteinInterfaces.Atom1.FullProteinInterfaceId.ProteinInterfaceId == lastInteractionBetweenProteinInterfaces.Atom1.FullProteinInterfaceId.ProteinInterfaceId &&
                    thisInteractionBetweenProteinInterfaces.Atom1.Atom == lastInteractionBetweenProteinInterfaces.Atom1.Atom &&
                    thisInteractionBetweenProteinInterfaces.Atom2.FullProteinInterfaceId.ProteinId == lastInteractionBetweenProteinInterfaces.Atom2.FullProteinInterfaceId.ProteinId &&
                    thisInteractionBetweenProteinInterfaces.Atom2.FullProteinInterfaceId.ChainId == lastInteractionBetweenProteinInterfaces.Atom2.FullProteinInterfaceId.ChainId &&
                    thisInteractionBetweenProteinInterfaces.Atom2.FullProteinInterfaceId.ProteinInterfaceId == lastInteractionBetweenProteinInterfaces.Atom2.FullProteinInterfaceId.ProteinInterfaceId &&
                    thisInteractionBetweenProteinInterfaces.Atom2.Atom == lastInteractionBetweenProteinInterfaces.Atom2.Atom)
                {
                    interactionBetweenProteinInterfacesListContainer.InteractionBetweenNonProteinInterfacesList.RemoveAt(index - 1);
                    //////Console.WriteLine("removed duplicate");
                }
            }

            return interactionBetweenProteinInterfacesListContainer;
        }
Exemplo n.º 12
0
        /// <summary>
        ///     Load proteinInterface data from the PDB file based on a list of already detected proteinInterfaces.
        ///     The detected proteinInterfaces may be missing data such as other atoms or residues which are also in the proteinInterface but were not
        ///     directly interacting.
        ///     The positions and lengths of the proteinInterfaces are also calculated.
        /// </summary>
        /// <param name="pdbFilename"></param>
        /// <param name="pdbFileChains"></param>
        /// <param name="singularAaToAaInteractions"></param>
        /// <param name="proteinInterfacesClusteringResult"></param>
        /// <param name="detectedBestStages"></param>
        /// <param name="interactionBetweenProteinInterfacesContainer"></param>
        /// <returns></returns>
        public static List <ProteinInterfaceSequenceAndPositionData> AnalyseProteinInterfacesSequenceAndPositionData(
            string pdbFilename,
            Dictionary <string, List <string> > pdbIdChainIdList,
            ProteinChainListContainer pdbFileChains,
            ProteinChainListContainer singularAaToAaInteractions,
            ClusteringFullResultListContainer proteinInterfacesClusteringResult,
            int[] detectedBestStages,
            InteractionBetweenProteinInterfacesListContainer interactionBetweenProteinInterfacesContainer)
        {
            if (string.IsNullOrWhiteSpace(pdbFilename))
            {
                throw new ArgumentOutOfRangeException(nameof(pdbFilename));
            }

            if (!File.Exists(pdbFilename))
            {
                throw new FileNotFoundException("File not found", pdbFilename);
            }

            if (ParameterValidation.IsProteinChainListContainerNullOrEmpty(singularAaToAaInteractions))
            {
                throw new ArgumentOutOfRangeException(nameof(singularAaToAaInteractions));
            }

            if (ParameterValidation.IsClusteringFullResultListContainerNullOrEmpty(proteinInterfacesClusteringResult))
            {
                throw new ArgumentOutOfRangeException(nameof(proteinInterfacesClusteringResult));
            }

            if (ParameterValidation.IsIntArrayNullOrEmpty(detectedBestStages))
            {
                throw new ArgumentOutOfRangeException(nameof(detectedBestStages));
            }

            // ProteinInterfaces are clusters with non-proteinInterfaces removed.

            var    result      = new List <ProteinInterfaceSequenceAndPositionData>();
            string proteinId   = ProteinDataBankFileOperations.PdbIdFromPdbFilename(pdbFilename);
            int    totalChains = proteinInterfacesClusteringResult.ChainList.Count;

            for (int chainIndex = 0; chainIndex < totalChains; chainIndex++)
            {
                int    stageIndex    = detectedBestStages[chainIndex];
                string chainIdLetter = SpreadsheetFileHandler.AlphabetLetterRollOver(chainIndex);

                List <ClusteringFullResultListContainer.Chain.Stage.Cluster> proteinInterfaceList = proteinInterfacesClusteringResult.ChainList[chainIndex].StageList[stageIndex].ClusterList;

                List <ClusteringFullResultListContainer.Chain.Stage.Cluster> nonEmptyProteinInterfaceList = proteinInterfaceList.Where(a => a != null && a.AtomIndexList != null && a.AtomIndexList.Count > 0).ToList();

                // loop through each proteinInterface
                for (int proteinInterfaceIndex = 0; proteinInterfaceIndex < nonEmptyProteinInterfaceList.Count; proteinInterfaceIndex++)
                {
                    ClusteringFullResultListContainer.Chain.Stage.Cluster proteinInterface = nonEmptyProteinInterfaceList[proteinInterfaceIndex];

                    // Find min and max residue sequence index value in the proteinInterface

                    MinMax proteinInterfaceResidueSequenceIndexes = MinMaxResidueSequenceIndex(proteinInterface, singularAaToAaInteractions, chainIndex);
                    int    proteinInterfaceLength = CalculateProteinInterfaceLength(proteinInterfaceResidueSequenceIndexes.Min, proteinInterfaceResidueSequenceIndexes.Max);

                    string proteinInterfaceIdLetter = SpreadsheetFileHandler.AlphabetLetterRollOver(proteinInterfaceIndex);

                    var proteinInterfacePositionData = new ProteinInterfaceSequenceAndPositionData
                    {
                        FullProteinInterfaceId = new FullProteinInterfaceId(proteinId, chainIndex, proteinInterfaceIndex, proteinInterfaceResidueSequenceIndexes.Min, proteinInterfaceResidueSequenceIndexes.Max),
                        ChainIdLetter          = chainIdLetter,

                        ProteinInterfaceIdLetter = proteinInterfaceIdLetter,

                        StartPosition          = proteinInterfaceResidueSequenceIndexes.Min,
                        EndPosition            = proteinInterfaceResidueSequenceIndexes.Max,
                        ProteinInterfaceLength = CalculateProteinInterfaceLength(proteinInterfaceResidueSequenceIndexes.Min, proteinInterfaceResidueSequenceIndexes.Max)
                    };
                    proteinInterfacePositionData.AminoAcidSequenceAllResidueSequenceIndexes = new ProteinInterfaceAminoAcidMetaData[proteinInterfacePositionData.ProteinInterfaceLength];

                    proteinInterfacePositionData.AminoAcidSequenceAll1L             = "";
                    proteinInterfacePositionData.AminoAcidSequenceInteractionsAll1L = "";
                    proteinInterfacePositionData.AminoAcidSequenceInteractionsInsideProteinInterfacesOnly1L = "";
                    proteinInterfacePositionData.AminoAcidSequenceInteractionsNone1L = "";
                    proteinInterfacePositionData.AminoAcidSequenceInteractionsOutsideProteinInterfacesOnly1L = "";

                    proteinInterfacePositionData.AminoAcidSequenceAll3L             = "";
                    proteinInterfacePositionData.AminoAcidSequenceInteractionsAll3L = "";
                    proteinInterfacePositionData.AminoAcidSequenceInteractionsInsideProteinInterfacesOnly3L = "";
                    proteinInterfacePositionData.AminoAcidSequenceInteractionsNone3L = "";
                    proteinInterfacePositionData.AminoAcidSequenceInteractionsOutsideProteinInterfacesOnly3L = "";

                    //int foundAtomCount = 0;

                    const string placeholder1L = "_";
                    const string placeholder3L = "___";

                    for (int residueSequenceIndex = proteinInterfaceResidueSequenceIndexes.Min; residueSequenceIndex <= proteinInterfaceResidueSequenceIndexes.Max; residueSequenceIndex++)
                    {
                        /* questions
                         * 1. does this reside interact with another reside which is also part of a proteinInterface?
                         * 2. if not, does this reside interact at all?
                         */

                        var proteinInterfaceAminoAcidMetaData = new ProteinInterfaceAminoAcidMetaData();
                        proteinInterfacePositionData.AminoAcidSequenceAllResidueSequenceIndexes[proteinInterfacePositionData.AminoAcidSequenceAll1L.Length] = proteinInterfaceAminoAcidMetaData;

                        ATOM_Record foundAtomInsidePdbFile = AtomSearchMethods.FindAtomInsidePdbFileChain(pdbFileChains, chainIndex, residueSequenceIndex);

                        if (foundAtomInsidePdbFile == null)
                        {
                            // Non-CA atom is loaded here in case of missing CA atom to find the AA code for the resSeq index
                            var chainIdList = pdbIdChainIdList != null ? (pdbIdChainIdList.ContainsKey(proteinId) ? pdbIdChainIdList[proteinId].ToArray() : null) : null;

                            ProteinChainListContainer pdbFileChains2 = ProteinDataBankFileOperations.PdbAtomicChains(pdbFilename, chainIdList, -1, -1, false);
                            foundAtomInsidePdbFile = AtomSearchMethods.FindAtomInsidePdbFileChain(pdbFileChains2, chainIndex, residueSequenceIndex);
                        }

                        proteinInterfaceAminoAcidMetaData.PdbResidueSequenceIndex          = residueSequenceIndex;
                        proteinInterfaceAminoAcidMetaData.ArrayMemberIndex                 = pdbFileChains.ChainList[chainIndex].AtomList.IndexOf(foundAtomInsidePdbFile);
                        proteinInterfaceAminoAcidMetaData.OppoproteinInterfaceInteractions = new bool[proteinInterfaceLength];


                        if (foundAtomInsidePdbFile != null)
                        {
                            proteinInterfacePositionData.AminoAcidSequenceAll1L += AminoAcidConversions.AminoAcidNameToCode1L(foundAtomInsidePdbFile.resName.FieldValue);

                            proteinInterfacePositionData.AminoAcidSequenceAll3L += foundAtomInsidePdbFile.resName.FieldValue.PadRight(3, '_');
                        }
                        else
                        {
                            proteinInterfacePositionData.AminoAcidSequenceAll1L             += placeholder1L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsAll1L += placeholder1L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsInsideProteinInterfacesOnly1L  += placeholder1L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsOutsideProteinInterfacesOnly1L += placeholder1L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsNone1L += placeholder1L;

                            proteinInterfacePositionData.AminoAcidSequenceAll3L             += placeholder3L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsAll3L += placeholder3L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsInsideProteinInterfacesOnly3L  += placeholder3L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsOutsideProteinInterfacesOnly3L += placeholder3L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsNone3L += placeholder3L;

                            proteinInterfaceAminoAcidMetaData.ProteinInterfaceInteractionType = ProteinInterfaceInteractionType.NoInteractionFound;
                            proteinInterfaceAminoAcidMetaData.NonProteinInterfaceInteractionResidueNames1L += placeholder1L;
                            proteinInterfaceAminoAcidMetaData.NonProteinInterfaceInteractionResidueNames3L += placeholder3L;
                            continue;
                        }

                        List <ATOM_Record> foundAtomInteractingWithAnotherProteinInterface = AtomSearchMethods.FindAtomInteractingWithOtherProteinInterfaces(foundAtomInsidePdbFile, interactionBetweenProteinInterfacesContainer, FindAtomInteractingWithAnotherProteinInterfaceOptions.FindAtomsInteractingWithOtherProteinInterfaces);
                        List <ATOM_Record> foundAtomInteractingWithNonProteinInterface     = AtomSearchMethods.FindAtomInteractingWithOtherProteinInterfaces(foundAtomInsidePdbFile, interactionBetweenProteinInterfacesContainer, FindAtomInteractingWithAnotherProteinInterfaceOptions.FindAtomsInteractingWithNonProteinInterfaces);

                        proteinInterfaceAminoAcidMetaData.OppoproteinInterfaceInteractions = AminoAcidInteractionVector(singularAaToAaInteractions, proteinInterfacesClusteringResult, detectedBestStages, interactionBetweenProteinInterfacesContainer, chainIndex, proteinInterfaceIndex, residueSequenceIndex);

                        proteinInterfaceAminoAcidMetaData.ResidueName1L = AminoAcidConversions.AminoAcidNameToCode1L(foundAtomInsidePdbFile.resName.FieldValue);
                        proteinInterfaceAminoAcidMetaData.ResidueName3L = foundAtomInsidePdbFile.resName.FieldValue.PadRight(3, '_');

                        if (foundAtomInteractingWithAnotherProteinInterface != null)
                        {
                            foreach (ATOM_Record atom in foundAtomInteractingWithAnotherProteinInterface)
                            {
                                proteinInterfaceAminoAcidMetaData.ProteinInterfaceInteractionResidueNames1L += AminoAcidConversions.AminoAcidNameToCode1L(atom.resName.FieldValue);
                                proteinInterfaceAminoAcidMetaData.ProteinInterfaceInteractionResidueNames3L += atom.resName.FieldValue.PadRight(3, '_');
                            }
                        }

                        if (foundAtomInteractingWithNonProteinInterface != null)
                        {
                            foreach (ATOM_Record atom in foundAtomInteractingWithNonProteinInterface)
                            {
                                proteinInterfaceAminoAcidMetaData.NonProteinInterfaceInteractionResidueNames1L += AminoAcidConversions.AminoAcidNameToCode1L(atom.resName.FieldValue);
                                proteinInterfaceAminoAcidMetaData.NonProteinInterfaceInteractionResidueNames3L += atom.resName.FieldValue.PadRight(3, '_');
                            }
                        }

                        if (foundAtomInteractingWithAnotherProteinInterface != null && foundAtomInteractingWithAnotherProteinInterface.Count > 0)
                        {
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsAll1L += AminoAcidConversions.AminoAcidNameToCode1L(foundAtomInsidePdbFile.resName.FieldValue);
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsInsideProteinInterfacesOnly1L  += AminoAcidConversions.AminoAcidNameToCode1L(foundAtomInsidePdbFile.resName.FieldValue);
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsOutsideProteinInterfacesOnly1L += placeholder1L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsNone1L += placeholder1L;

                            proteinInterfacePositionData.AminoAcidSequenceInteractionsAll3L += foundAtomInsidePdbFile.resName.FieldValue.PadRight(3, '_');
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsInsideProteinInterfacesOnly3L  += foundAtomInsidePdbFile.resName.FieldValue.PadRight(3, '_');
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsOutsideProteinInterfacesOnly3L += placeholder3L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsNone3L += placeholder3L;

                            proteinInterfaceAminoAcidMetaData.ProteinInterfaceInteractionType = ProteinInterfaceInteractionType.InteractionWithAnotherProteinInterface;

                            if (foundAtomInteractingWithNonProteinInterface != null && foundAtomInteractingWithNonProteinInterface.Count > 0)
                            {
                                proteinInterfaceAminoAcidMetaData.ProteinInterfaceInteractionType |= ProteinInterfaceInteractionType.InteractionWithNonProteinInterface;
                            }
                        }
                        else if (foundAtomInteractingWithNonProteinInterface != null && foundAtomInteractingWithNonProteinInterface.Count > 0)
                        {
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsAll1L += AminoAcidConversions.AminoAcidNameToCode1L(foundAtomInsidePdbFile.resName.FieldValue);
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsInsideProteinInterfacesOnly1L  += placeholder1L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsOutsideProteinInterfacesOnly1L += AminoAcidConversions.AminoAcidNameToCode1L(foundAtomInsidePdbFile.resName.FieldValue);
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsNone1L += placeholder1L;

                            proteinInterfacePositionData.AminoAcidSequenceInteractionsAll3L += foundAtomInsidePdbFile.resName.FieldValue.PadRight(3, '_');
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsInsideProteinInterfacesOnly3L  += placeholder3L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsOutsideProteinInterfacesOnly3L += foundAtomInsidePdbFile.resName.FieldValue.PadRight(3, '_');
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsNone3L += placeholder3L;

                            proteinInterfaceAminoAcidMetaData.ProteinInterfaceInteractionType = ProteinInterfaceInteractionType.InteractionWithNonProteinInterface;
                        }
                        else
                        {
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsAll1L += placeholder1L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsInsideProteinInterfacesOnly1L  += placeholder1L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsOutsideProteinInterfacesOnly1L += placeholder1L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsNone1L += AminoAcidConversions.AminoAcidNameToCode1L(foundAtomInsidePdbFile.resName.FieldValue);

                            proteinInterfacePositionData.AminoAcidSequenceInteractionsAll3L += placeholder3L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsInsideProteinInterfacesOnly3L  += placeholder3L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsOutsideProteinInterfacesOnly3L += placeholder3L;
                            proteinInterfacePositionData.AminoAcidSequenceInteractionsNone3L += foundAtomInsidePdbFile.resName.FieldValue.PadRight(3, '_');

                            proteinInterfaceAminoAcidMetaData.ProteinInterfaceInteractionType = ProteinInterfaceInteractionType.NoInteractionFound;
                        }
                    }

                    result.Add(proteinInterfacePositionData);
                }
            }

            return(result);
        }
Exemplo n.º 13
0
        /// <summary>
        ///     This method returns an array showing whether an amino acid has an interaction with any amino acids in a proteinInterface of
        ///     another chain
        /// </summary>
        /// <returns></returns>
        public static bool[] AminoAcidInteractionVector(
            ProteinChainListContainer singularAaToAaInteractions,
            ClusteringFullResultListContainer proteinInterfacesClusteringResult,
            int[] detectedBestStages,
            InteractionBetweenProteinInterfacesListContainer interactionsBetweenProteinInterfacesContainer,
            //int proteinInterfaceLength,
            int sourceChainIndex,
            int sourceProteinInterfaceIndex,
            int sourceResidueIndex
            )
        {
            // find the largest proteinInterface to make vector the same size
            int maxProteinInterfaceLength = 0;

            for (int chainIndex = 0; chainIndex < proteinInterfacesClusteringResult.ChainList.Count; chainIndex++)
            {
                List <ClusteringFullResultListContainer.Chain.Stage.Cluster> proteinInterfaceList         = proteinInterfacesClusteringResult.ChainList[chainIndex].StageList[detectedBestStages[chainIndex]].ClusterList;
                List <ClusteringFullResultListContainer.Chain.Stage.Cluster> nonEmptyProteinInterfaceList = proteinInterfaceList.Where(a => a != null && a.AtomIndexList != null && a.AtomIndexList.Count > 0).ToList();

                for (int proteinInterfaceIndex = 0; proteinInterfaceIndex < nonEmptyProteinInterfaceList.Count; proteinInterfaceIndex++)
                {
                    ClusteringFullResultListContainer.Chain.Stage.Cluster proteinInterface = nonEmptyProteinInterfaceList[proteinInterfaceIndex];

                    if (proteinInterface.AtomIndexList == null || proteinInterface.AtomIndexList.Count == 0)
                    {
                        continue;
                    }

                    int length = FindProteinInterfaceLength(singularAaToAaInteractions, proteinInterfacesClusteringResult, detectedBestStages, chainIndex, proteinInterfaceIndex);

                    if (length > maxProteinInterfaceLength)
                    {
                        maxProteinInterfaceLength = length;
                    }
                }
            }


            // find interactions matching the current chain id and proteinInterface id and res id... res id is different from resSeq in the pdb

            var result = new bool[maxProteinInterfaceLength];

            List <InteractionBetweenProteinInterfaces> matchingInteractions = interactionsBetweenProteinInterfacesContainer.InteractionBetweenProteinInterfacesList.Where(a => (a.Atom1.FullProteinInterfaceId.ChainId == sourceChainIndex && a.Atom1.FullProteinInterfaceId.ProteinInterfaceId == sourceProteinInterfaceIndex) || (a.Atom2.FullProteinInterfaceId.ChainId == sourceChainIndex && a.Atom2.FullProteinInterfaceId.ProteinInterfaceId == sourceProteinInterfaceIndex)).ToList();


            if (matchingInteractions.Count == 0)
            {
                return(result);
            }

            foreach (InteractionBetweenProteinInterfaces interaction in matchingInteractions)
            {
                if (interaction.Atom1.FullProteinInterfaceId.ChainId == sourceChainIndex && interaction.Atom1.FullProteinInterfaceId.ProteinInterfaceId == sourceProteinInterfaceIndex && ProteinDataBankFileOperations.NullableTryParseInt32(interaction.Atom1.Atom.resSeq.FieldValue) == sourceResidueIndex)
                {
                    // where in the proteinInterface oppoproteinInterface proteinInterface is Atom2?
                    int index = AtomIndexPositionInProteinInterface(singularAaToAaInteractions, proteinInterfacesClusteringResult, detectedBestStages, interaction.Atom2.FullProteinInterfaceId.ChainId, interaction.Atom2.FullProteinInterfaceId.ProteinInterfaceId, interaction.Atom2.Atom);
                    result[index] = true;
                }

                else if (interaction.Atom2.FullProteinInterfaceId.ChainId == sourceChainIndex && interaction.Atom2.FullProteinInterfaceId.ProteinInterfaceId == sourceProteinInterfaceIndex && ProteinDataBankFileOperations.NullableTryParseInt32(interaction.Atom2.Atom.resSeq.FieldValue) == sourceResidueIndex)
                {
                    int index = AtomIndexPositionInProteinInterface(singularAaToAaInteractions, proteinInterfacesClusteringResult, detectedBestStages, interaction.Atom1.FullProteinInterfaceId.ChainId, interaction.Atom1.FullProteinInterfaceId.ProteinInterfaceId, interaction.Atom1.Atom);
                    result[index] = true;
                }
            }

            return(result);
        }
Exemplo n.º 14
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="cancellationToken"></param>
        /// <param name="pdbFilename"></param>
        /// <param name="pdbIdChainIdList"></param>
        /// <param name="pdbFileChains"></param>
        /// <param name="singularAaToAaInteractions"></param>
        /// <param name="fullClusteringResult"></param>
        /// <returns></returns>
        public static ProteinInterfaceAnalysisResultData AnalyseProteinInterfaces(
            CancellationToken cancellationToken,
            decimal maxAtomInterationDistance,
            decimal minimumProteinInterfaceDensity,
            string pdbFilename,
            Dictionary <string, List <string> > pdbIdChainIdList,
            ProteinChainListContainer pdbFileChains,
            ProteinChainListContainer singularAaToAaInteractions,
            ClusteringFullResultListContainer fullClusteringResult)
        {
            if (ParameterValidation.IsLoadFilenameInvalid(pdbFilename))
            {
                throw new ArgumentOutOfRangeException(nameof(pdbFilename));
            }

            if (ParameterValidation.IsProteinChainListContainerNullOrEmpty(singularAaToAaInteractions))
            {
                throw new ArgumentOutOfRangeException(nameof(singularAaToAaInteractions));
            }

            if (ParameterValidation.IsClusteringFullResultListContainerNullOrEmpty(fullClusteringResult))
            {
                throw new ArgumentOutOfRangeException(nameof(fullClusteringResult));
            }

            string proteinId = ProteinDataBankFileOperations.PdbIdFromPdbFilename(pdbFilename);

            List <List <int> > chainStageProteinInterfaceCount;

            // Find how many proteinInterfaces at each stage.
            ClusteringFullResultListContainer proteinInterfacesClusteringResult = DetectProteinInterfaces(proteinId, singularAaToAaInteractions, fullClusteringResult, out chainStageProteinInterfaceCount, ClusteringProteinInterfaceDensityDetectionOptions.ResidueSequenceIndex, minimumProteinInterfaceDensity);

            // Find the last stage having required number of proteinInterfaces.
            int[] detectedBestClusterStagesIndexes = ProteinInterfaceTreeOptimalStageDetection.FindFinalProteinInterfaceStageIndexes(singularAaToAaInteractions, fullClusteringResult, proteinInterfacesClusteringResult, chainStageProteinInterfaceCount);

            int totalChains = singularAaToAaInteractions.ChainList.Count;

            var interactionProteinInterfaceClusteringHierarchyDataList = new List <InteractionProteinInterfaceClusteringHierarchyData>();

            int[] numberProteinInterfacesPerChain = FindNumberProteinInterfacesPerChain(proteinInterfacesClusteringResult, detectedBestClusterStagesIndexes);

            for (int chainIndex = 0; chainIndex < totalChains; chainIndex++)
            {
                int stageIndex = detectedBestClusterStagesIndexes[chainIndex];

                string chainIdLetter = SpreadsheetFileHandler.AlphabetLetterRollOver(chainIndex);

                var interactionProteinInterfaceClusteringHierarchyData = new InteractionProteinInterfaceClusteringHierarchyData(proteinId, chainIdLetter, numberProteinInterfacesPerChain[chainIndex], stageIndex + 1, fullClusteringResult.ChainList[chainIndex].StageList.Count);

                interactionProteinInterfaceClusteringHierarchyDataList.Add(interactionProteinInterfaceClusteringHierarchyData);
            }

            InteractionBetweenProteinInterfacesListContainer interactionBetweenProteinInterfacesContainer = CrossProteinInterfaceInteractions.FindInteractionsBetweenAnyProteinInterfaces(cancellationToken, maxAtomInterationDistance, pdbFilename, pdbIdChainIdList, pdbFileChains, singularAaToAaInteractions, fullClusteringResult, proteinInterfacesClusteringResult, detectedBestClusterStagesIndexes);

            List <ProteinInterfaceSequenceAndPositionData> analyseProteinInterfacesSequenceAndPositionData = AnalyseProteinInterfacesSequenceAndPositionData(pdbFilename, pdbIdChainIdList, pdbFileChains, singularAaToAaInteractions, proteinInterfacesClusteringResult, detectedBestClusterStagesIndexes, interactionBetweenProteinInterfacesContainer);

            var result = new ProteinInterfaceAnalysisResultData(
                detectedBestClusterStagesIndexes,
                proteinInterfacesClusteringResult,
                interactionProteinInterfaceClusteringHierarchyDataList,
                interactionBetweenProteinInterfacesContainer,
                analyseProteinInterfacesSequenceAndPositionData
                );

            return(result);
        }
        public static List <AtomPair> FindInteractions(CancellationToken cancellationToken, decimal maxAtomInterationDistance /*= 8.0m*/, string proteinId, Dictionary <string, List <string> > pdbIdChainIdList, ProteinChainListContainer proteinFileChains, bool breakWhenFirstInteractionFound = false, int totalThreads = -1, bool sort = true, int requiredChains = -1)
        {
            //const decimal maxInterationDistance = 8.0m;
            bool useCache = false;

            if (useCache && !string.IsNullOrWhiteSpace(proteinId))
            {
                var cachedInteractions = InteractionsCache.LoadPdbInteractionCache(proteinId, requiredChains);

                if (cachedInteractions != null)
                {
                    return(cachedInteractions);
                }
            }

            // check required number of chains are found
            if (proteinFileChains == null || proteinFileChains.ChainList == null || (requiredChains > -1 && proteinFileChains.ChainList.Count != requiredChains))
            {
                return(null);
            }

            // check that all chains have atoms
            if (proteinFileChains.ChainList.Any(chain => chain.AtomList == null || chain.AtomList.Count == 0))
            {
                return(null);
            }

            // Make list of 3D positions of atoms.
            var positions = new List <Point3D> [proteinFileChains.ChainList.Count];

            for (int chainIndex = 0; chainIndex < proteinFileChains.ChainList.Count; chainIndex++)
            {
                positions[chainIndex] = Clustering.AtomRecordListToPoint3DList(proteinFileChains.ChainList[chainIndex]);
            }

            var tasks = new List <Task <List <AtomPair> > >();

            for (int chainIndexA = 0; chainIndexA < proteinFileChains.ChainList.Count; chainIndexA++)
            {
                for (int chainIndexB = 0; chainIndexB < proteinFileChains.ChainList.Count; chainIndexB++)
                {
                    if (chainIndexB == chainIndexA || chainIndexB < chainIndexA)
                    {
                        continue;
                    }

                    WorkDivision <List <AtomPair> > workDivision = new WorkDivision <List <AtomPair> >(proteinFileChains.ChainList[chainIndexA].AtomList.Count, totalThreads);

                    bool breakOut     = false;
                    var  lockBreakOut = new object();

                    for (int threadIndex = 0; threadIndex < workDivision.ThreadCount; threadIndex++)
                    {
                        int localThreadIndex = threadIndex;
                        int localChainIndexA = chainIndexA;
                        int localChainIndexB = chainIndexB;
                        WorkDivision <List <AtomPair> > localWorkDivision = workDivision;

                        Task <List <AtomPair> > task = Task.Run(() =>
                        {
                            var taskResult = new List <AtomPair>();

                            for (int atomIndexA = localWorkDivision.ThreadFirstIndex[localThreadIndex]; atomIndexA <= localWorkDivision.ThreadLastIndex[localThreadIndex]; atomIndexA++)
                            {
                                if (breakOut)
                                {
                                    break;
                                }

                                for (int atomIndexB = 0; atomIndexB < proteinFileChains.ChainList[localChainIndexB].AtomList.Count; atomIndexB++)
                                {
                                    if (breakOut || (breakWhenFirstInteractionFound && taskResult.Count > 0))
                                    {
                                        lock (lockBreakOut)
                                        {
                                            breakOut = true;
                                        }

                                        break;
                                    }

                                    if ((!positions[localChainIndexA][atomIndexA].ParseOK) || (!positions[localChainIndexB][atomIndexB].ParseOK))
                                    {
                                        continue;
                                    }

                                    decimal atomicDistanceAngstroms3D = Point3D.Distance3D(positions[localChainIndexA][atomIndexA], positions[localChainIndexB][atomIndexB], true);

                                    // Chemical proteinInterface bonds found at 5 angstrom or less.
                                    if (atomicDistanceAngstroms3D <= 0.0m || atomicDistanceAngstroms3D > maxAtomInterationDistance)
                                    {
                                        continue;
                                    }

                                    var atomPair = new AtomPair(
                                        proteinId,
                                        proteinFileChains.ChainList[localChainIndexA].AtomList[atomIndexA],
                                        localChainIndexA,
                                        proteinId,
                                        localChainIndexB,
                                        proteinFileChains.ChainList[localChainIndexB].AtomList[atomIndexB],
                                        atomicDistanceAngstroms3D);


                                    taskResult.Add(atomPair);
                                }
                            }

                            if (taskResult.Count == 0)
                            {
                                return(null);
                            }

                            return(taskResult);
                        }, cancellationToken);

                        workDivision.TaskList.Add(task);
                    }

                    tasks.AddRange(workDivision.TaskList);
                }
            }


            try
            {
                Task[] tasksToWait = tasks.Where(task => task != null && !task.IsCompleted).ToArray <Task>();
                if (tasksToWait.Length > 0)
                {
                    Task.WaitAll(tasksToWait);
                }
            }
            catch (AggregateException)
            {
            }

            // merge all results

            var atomPairList = new List <AtomPair>();

            foreach (var task in tasks.Where(t => t != null && t.IsCompleted && !t.IsCanceled && !t.IsFaulted && t.Result != null && t.Result.Count > 0))
            {
                atomPairList.AddRange(task.Result);
            }

            if (sort && atomPairList != null && atomPairList.Count > 1)
            {
                atomPairList = atomPairList
                               .OrderBy(i => ProteinDataBankFileOperations.NullableTryParseInt32(i.Atom1.resSeq.FieldValue))
                               .ThenBy(i => ProteinDataBankFileOperations.NullableTryParseInt32(i.Atom1.serial.FieldValue))
                               .ThenBy(j => ProteinDataBankFileOperations.NullableTryParseInt32(j.Atom2.resSeq.FieldValue))
                               .ThenBy(j => ProteinDataBankFileOperations.NullableTryParseInt32(j.Atom2.serial.FieldValue))
                               .ToList();
            }

            if (useCache)
            {
                InteractionsCache.SavePdbInteractionCache(proteinId, atomPairList, requiredChains);
            }

            return(atomPairList);
        }
        public static string[,] Spreadsheet(List <MotifProfileSpreadsheetRecord> motifProfileSpreadsheetRecordList)
        {
            if (motifProfileSpreadsheetRecordList == null)
            {
                throw new ArgumentNullException(nameof(motifProfileSpreadsheetRecordList));
            }

            var result = new List <string[]>();

            var totalAminoAcids = AminoAcidTotals.TotalAminoAcids();

            var sheetHeader = new List <string>()
            {
                "Motif Name",
                "Motif Source",
                "Direction",
                "Total Found",
                //"Total Found In Heterodimers",
                //"Total Found In Homodimers",
                "Profile Position",
            };

            sheetHeader.AddRange(AminoAcidConversions.AminoAcidCodeArray1L());

            result.Add(sheetHeader.ToArray());

            foreach (var record in motifProfileSpreadsheetRecordList.OrderByDescending(a => ProteinDataBankFileOperations.NullableTryParseInt32(a.TotalFound)))
            {
                result.Add(new string[] { });

                var recordHeader = new List <string>()
                {
                    record.MotifName,
                    record.MotifSource,
                    record.Direction,
                    record.TotalFound,
                    //record.TotalFoundInHeterodimers,
                    //record.TotalFoundInHomodimers,
                    "",
                };

                recordHeader.AddRange(AminoAcidConversions.AminoAcidCodeArray1L());

                result.Add(recordHeader.ToArray());

                for (var positionIndex = 0; positionIndex < record.AminoAcidProfile.Length; positionIndex++)
                {
                    var row = new string[sheetHeader.Count];

                    row[sheetHeader.IndexOf("Profile Position")] = "" + (positionIndex + 1);

                    for (var aaIndex = 0; aaIndex < record.AminoAcidProfile[positionIndex].Length; aaIndex++)
                    {
                        row[aaIndex + sheetHeader.IndexOf("Profile Position") + 1] = $"{record.AminoAcidProfile[positionIndex][aaIndex]:0.00}";
                    }

                    result.Add(row);
                }

                var rowAverage = new string[sheetHeader.Count];

                rowAverage[sheetHeader.IndexOf("Profile Position")] = "Average";

                for (var aaIndex = 0; aaIndex < record.AverageProfile.Length; aaIndex++)
                {
                    rowAverage[aaIndex + sheetHeader.IndexOf("Profile Position") + 1] = $"{record.AverageProfile[aaIndex]:0.00}";
                }

                result.Add(rowAverage);
            }


            return(ConvertTypes.StringJagged2DArrayTo2DArray(result.ToArray()));
        }
        public static List <string> RemoveNonProteinInterfaceRecords(string pdbFilename, List <ProteinInterfaceId> proteinInterfaceIdList)
        {
            var proteinDataBankFile = new ProteinDataBankFormat.ProteinDataBankFile(pdbFilename);

            var result = new List <string>();

            var foundEndModel = false;
            var levelModel    = 0;

            for (var proteinDataBankFileRecordIndex = 0; proteinDataBankFileRecordIndex < proteinDataBankFile.Count; proteinDataBankFileRecordIndex++)
            {
                var record = proteinDataBankFile.NextRecord();

                if (record == null)
                {
                    continue;
                }

                if (record.GetType() == typeof(MODEL_Record))
                {
                    levelModel++;
                }
                else if (record.GetType() == typeof(ENDMDL_Record))
                {
                    foundEndModel = true;
                    levelModel--;
                }
                else if (record.GetType() == typeof(ATOM_Record))
                {
                    var atom = (ATOM_Record)record;

                    var atomResSeq = ProteinDataBankFileOperations.NullableTryParseInt32(atom.resSeq.FieldValue);

                    if (atomResSeq != null)
                    {
                        var atomChain = atom.chainID.FieldValue;

                        if (!foundEndModel && proteinInterfaceIdList.Any(s => SpreadsheetFileHandler.AlphabetLetterRollOver(s.ChainId) == atomChain && atomResSeq >= s.FirstPosition && atomResSeq <= s.LastPosition))
                        {
                            result.Add(record.ColumnFormatLine);
                        }
                    }
                }
                else if (record.GetType() == typeof(HETATM_Record))
                {
                    var hetatm = (HETATM_Record)record;

                    var atomResSeq = ProteinDataBankFileOperations.NullableTryParseInt32(hetatm.resSeq.FieldValue);

                    if (atomResSeq == null)
                    {
                        continue;
                    }
                    var atomChain = hetatm.chainID.FieldValue;

                    if (!foundEndModel && proteinInterfaceIdList.Any(s => SpreadsheetFileHandler.AlphabetLetterRollOver(s.ChainId) == atomChain && atomResSeq >= s.FirstPosition && atomResSeq <= s.LastPosition))
                    {
                        result.Add(record.ColumnFormatLine);
                    }
                }
                else if (record.GetType() == typeof(LINK_Record))
                {
                    var link = (LINK_Record)record;

                    var atomResSeq1 = ProteinDataBankFileOperations.NullableTryParseInt32(link.resSeq1.FieldValue);
                    var atomResSeq2 = ProteinDataBankFileOperations.NullableTryParseInt32(link.resSeq2.FieldValue);

                    if (atomResSeq1 == null || atomResSeq2 == null)
                    {
                        continue;
                    }

                    if (!foundEndModel && proteinInterfaceIdList.Any(s => ((atomResSeq1 >= s.FirstPosition && atomResSeq1 <= s.LastPosition) && (atomResSeq2 >= s.FirstPosition && atomResSeq2 <= s.LastPosition)) ||
                                                                     ((atomResSeq2 >= s.FirstPosition && atomResSeq2 <= s.LastPosition) && (atomResSeq1 >= s.FirstPosition && atomResSeq1 <= s.LastPosition))))
                    {
                        result.Add(record.ColumnFormatLine);
                    }
                }
                else if (record.GetType() == typeof(ANISOU_Record))
                {
                    var anisou = (ANISOU_Record)record;

                    var atomResSeq = ProteinDataBankFileOperations.NullableTryParseInt32(anisou.resSeq.FieldValue);

                    if (atomResSeq == null)
                    {
                        continue;
                    }

                    var atomChain = anisou.chainID.FieldValue;

                    if (!foundEndModel && proteinInterfaceIdList.Any(s => SpreadsheetFileHandler.AlphabetLetterRollOver(s.ChainId) == atomChain && atomResSeq >= s.FirstPosition && atomResSeq <= s.LastPosition))
                    {
                        result.Add(record.ColumnFormatLine);
                    }
                }
                else if (record.GetType() == typeof(TER_Record))
                {
                    var ter = (TER_Record)record;

                    var atomChain = ter.chainID.FieldValue;

                    if (!foundEndModel && proteinInterfaceIdList.Any(s => SpreadsheetFileHandler.AlphabetLetterRollOver(s.ChainId) == atomChain))
                    {
                        result.Add(record.ColumnFormatLine);
                    }
                }
                else
                {
                    if (!foundEndModel || levelModel == 0)
                    {
                        result.Add(record.ColumnFormatLine);
                    }
                }
            }

            return(result);
        }
Exemplo n.º 18
0
        /// <summary>
        ///     This method returns a dictionary entry for each protein id (pdb id), with a list of interaction vectors
        /// </summary>
        /// <returns></returns>
        public static List <VectorProteinInterfaceWhole> LoadProteinInterfaceVectorFromFiles(
            CancellationToken cancellationToken,
            decimal maxAtomInterationDistance,
            decimal minimumProteinInterfaceDensity,
            string[] sequenceListFileArray,
            string[] pdbFileDirectoryLocationArray,
            ProgressActionSet progressActionSet)
        {
            if (sequenceListFileArray == null)
            {
                throw new ArgumentNullException(nameof(sequenceListFileArray));
            }
            if (pdbFileDirectoryLocationArray == null)
            {
                throw new ArgumentNullException(nameof(pdbFileDirectoryLocationArray));
            }

            var vectorProteinInterfaceWholeList = new List <VectorProteinInterfaceWhole>();

            // 1: Open list of sequences already cleaned to have only symmetrical homodimers (fasta file only contains 100% symmetrical homodimers with all other junk removed - but could have any number of proteinInterfaces per chain)
            List <ISequence> sequenceList = SequenceFileHandler.LoadSequenceFileList(sequenceListFileArray, StaticValues.MolNameProteinAcceptedValues);

            var pdbIdChainIdList = ProteinDataBankFileOperations.PdbIdChainIdList(sequenceList);

            // 2: Get a list of the unique ids for the sequences
            List <string> pdbIdList = FilterProteins.SequenceListToPdbIdList(sequenceList);

            if (pdbIdList == null || pdbIdList.Count == 0)
            {
                throw new ArgumentOutOfRangeException(nameof(sequenceListFileArray), "Error loading PDB ID list");
            }

            // 3: Get a list of PDB files found in user specified directory
            string[] pdbFilesArray = ProteinDataBankFileOperations.GetPdbFilesArray(pdbFileDirectoryLocationArray);



            ProgressActionSet.StartAction(pdbFilesArray.Length, progressActionSet);



            var startTicks = DateTime.Now.Ticks;

            // 4: Loop through each pdb file
            for (int pdbFileNumber = 0; pdbFileNumber < pdbFilesArray.Length; pdbFileNumber++) // +1 is for progress update
            {
                ProgressActionSet.ProgressAction(1, progressActionSet);

                ProgressActionSet.EstimatedTimeRemainingAction(startTicks, pdbFileNumber + 1, pdbFilesArray.Length, progressActionSet);

                // get unique id of pdb file
                string pdbFilename = pdbFilesArray[pdbFileNumber];
                string proteinId   = ProteinDataBankFileOperations.PdbIdFromPdbFilename(pdbFilename);

                // check pdb unique id was in the loaded sequence list
                if (!pdbIdList.Contains(proteinId))
                {
                    continue;
                }

                ClusterProteinDataBankFileResult clusterPdbFileResult = Clustering.ClusterProteinDataBankFile(cancellationToken, maxAtomInterationDistance, minimumProteinInterfaceDensity, pdbFilename, pdbIdChainIdList, ClusteringMethodOptions.ClusterWithResidueSequenceIndex, -1, -1, progressActionSet);

                if (clusterPdbFileResult == null)
                {
                    continue;
                }

                List <ProteinInterfaceSequenceAndPositionData> proteinInterfaceSequenceAndPositionDataList = clusterPdbFileResult.ProteinInterfaceAnalysisResultData.ProteinInterfacesSequenceAndPositionDataList;
                proteinInterfaceSequenceAndPositionDataList = proteinInterfaceSequenceAndPositionDataList.OrderBy(a => a.FullProteinInterfaceId.ProteinId).ThenBy(a => a.FullProteinInterfaceId.ChainId).ThenBy(a => a.FullProteinInterfaceId.ProteinInterfaceId).ToList();

                for (int proteinInterfaceSequenceAndPositionDataListIndex = 0; proteinInterfaceSequenceAndPositionDataListIndex < proteinInterfaceSequenceAndPositionDataList.Count; proteinInterfaceSequenceAndPositionDataListIndex++)
                {
                    ProteinInterfaceSequenceAndPositionData proteinInterfaceSequenceAndPositionData = proteinInterfaceSequenceAndPositionDataList[proteinInterfaceSequenceAndPositionDataListIndex];

                    var seq = sequenceList.FirstOrDefault(a =>
                    {
                        var p = SequenceIdSplit.SequenceIdToPdbIdAndChainId(a.ID);
                        return(p.PdbId.ToUpperInvariant() == proteinInterfaceSequenceAndPositionData.FullProteinInterfaceId.ProteinId.ToUpperInvariant() && p.ChainId.ToUpperInvariant() == proteinInterfaceSequenceAndPositionData.ChainIdLetter.ToUpperInvariant());
                    });

                    var seqLen = seq != null ? seq.Count : -1;

                    var vectorProteinInterfaceWholeFwd = MakeVectorProteinInterfaceWhole(pdbFilename, proteinInterfaceSequenceAndPositionData, false, false);
                    vectorProteinInterfaceWholeFwd.FullSequenceLength = seqLen;

                    vectorProteinInterfaceWholeList.Add(vectorProteinInterfaceWholeFwd);

                    var vectorProteinInterfaceWholeRev = MakeVectorProteinInterfaceWhole(pdbFilename, proteinInterfaceSequenceAndPositionData, true, false);
                    vectorProteinInterfaceWholeRev.FullSequenceLength = seqLen;

                    vectorProteinInterfaceWholeList.Add(vectorProteinInterfaceWholeRev);
                }
            }

            ProgressActionSet.FinishAction(true, progressActionSet);

            vectorProteinInterfaceWholeList = vectorProteinInterfaceWholeList.OrderBy(a => a.FullProteinInterfaceId.ProteinId).ThenBy(a => a.FullProteinInterfaceId.ChainId).ThenBy(a => a.FullProteinInterfaceId.ProteinInterfaceId).ToList();

            return(vectorProteinInterfaceWholeList);
        }