Beispiel #1
0
        public void IncrementAminoAcidCount(char aminoAcidCode, decimal incrementValue = 1)
        {
            if (!ParameterValidation.IsAminoAcidCodeValid(aminoAcidCode))
            {
                throw new ArgumentOutOfRangeException(nameof(aminoAcidCode));
            }

            IncrementAminoAcidCount(AminoAcidConversions.AminoAcidNameToNumber(aminoAcidCode), incrementValue);
        }
Beispiel #2
0
        /// <summary>
        ///     This method finds the PDB ID from a PDB file's filename.
        /// </summary>
        /// <param name="pdbFilename"></param>
        /// <returns></returns>
        public static string PdbIdFromPdbFilename(string pdbFilename)
        {
            const int proteinIdLength = 4;

            if (string.IsNullOrWhiteSpace(pdbFilename))
            {
                throw new ArgumentOutOfRangeException(nameof(pdbFilename), pdbFilename, "parameter was " + ParameterValidation.NullEmptyOrWhiteSpaceToString(pdbFilename));
            }

            var proteinId = Path.GetFileNameWithoutExtension(pdbFilename).Trim();

            if (proteinId.Length > proteinIdLength)
            {
                proteinId = proteinId.Substring(3);//.Replace("pdb", "");
            }

            if (proteinId.Length != proteinIdLength)
            {
                throw new ArgumentException("PDB ID could not be extracted from parameter pdbFilename", nameof(pdbFilename));
            }

            return(proteinId.ToUpperInvariant());
        }
Beispiel #3
0
        /// <summary>
        ///     This method loads 1 pdb file and returns the atoms contained in the different chains.
        /// </summary>
        /// <param name="pdbFilename"></param>
        /// <param name="chainIdWhiteList"></param>
        /// <param name="minimumChains"></param>
        /// <param name="maximumChains"></param>
        /// <returns></returns>
        public static ProteinChainListContainer PdbAtomicChains(string pdbFilename, string[] chainIdWhiteList, int minimumChains = 2, int maximumChains = 2, bool onlyCarbonAlphas = false)
        {
            ////////Console.WriteLine(pdbFilename);
            // Check file exists.
            if (!File.Exists(pdbFilename))
            {
                //return null;
                throw new FileNotFoundException("File not found", pdbFilename);
            }

            // Check min chains not more than max chains.
            if (minimumChains > maximumChains)
            {
                throw new ArgumentOutOfRangeException(nameof(minimumChains));
            }

            // Load pdb/protein file, excluding all records but ATOM, HETATM and TER.
            var proteinDataBankFile = new ProteinDataBankFile(pdbFilename, new[]
            {
                ATOM_Record.ATOM_Field.FieldName,
                HETATM_Record.HETATM_Field.FieldName,
                TER_Record.TER_Field.FieldName,
                MODEL_Record.MODEL_Field.FieldName,
                ENDMDL_Record.ENDMDL_Field.FieldName
            });


            // Make new array for atom chain.
            //List<ATOM_Record>[] proteinFileChains = new List<ATOM_Record>[maximumChains];
            var pdbFileChains = new ProteinChainListContainer();

            //var fileError = false;
            //var chainCount = 0;
            // Loop through all the previously loaded protein file records to make lists of atoms in each chain.
            // Also make a list of residue numbers (which will be sorted later just in case it is out of order).

            var atomRecordListDictionary    = new Dictionary <string, List <ProteinDataBankFileRecord> >();
            var hetAtomRecordListDictionary = new Dictionary <string, List <ProteinDataBankFileRecord> >();
            int terCount = 0;

            for (int proteinDataBankFileRecordIndex = 0; proteinDataBankFileRecordIndex < proteinDataBankFile.Count; proteinDataBankFileRecordIndex++)
            {
                ProteinDataBankFileRecord currentRecord = proteinDataBankFile.NextRecord();

                if (currentRecord == null)
                {
                    continue;
                }

                if (currentRecord.GetType() == typeof(ATOM_Record))
                {
                    var atom = (ATOM_Record)currentRecord;

                    if (onlyCarbonAlphas && atom.name.FieldValue.Trim().ToUpperInvariant() != StaticValues.CarbonAlpha)
                    {
                        continue;
                    }

                    string chainIdKey = atom.chainID.FieldValue.Trim().ToUpperInvariant();

                    if (chainIdWhiteList != null && !chainIdWhiteList.Contains(chainIdKey))
                    {
                        continue;
                    }

                    if (!atomRecordListDictionary.ContainsKey(chainIdKey))
                    {
                        atomRecordListDictionary.Add(chainIdKey, new List <ProteinDataBankFileRecord>());
                    }

                    if (ParameterValidation.IsAminoAcidCodeValid(atom.resName.FieldValue))
                    {
                        atomRecordListDictionary[chainIdKey].Add(atom);
                    }
                }
                else if (currentRecord.GetType() == typeof(HETATM_Record))
                {
                    var hetatm = (HETATM_Record)currentRecord;

                    if (onlyCarbonAlphas && hetatm.name.FieldValue.Trim().ToUpperInvariant() != StaticValues.CarbonAlpha)
                    {
                        continue;
                    }

                    string chainIdKey = hetatm.chainID.FieldValue.Trim().ToUpperInvariant();

                    if (chainIdWhiteList != null && !chainIdWhiteList.Contains(chainIdKey))
                    {
                        continue;
                    }

                    if (!hetAtomRecordListDictionary.ContainsKey(chainIdKey))
                    {
                        hetAtomRecordListDictionary.Add(chainIdKey, new List <ProteinDataBankFileRecord>());
                    }

                    //if (!ParameterValidation.IsAminoAcidCodeValid(hetatm.resName.FieldValue))
                    //{
                    //    ////////Console.WriteLine(hetatm.columnFormatLine);
                    //    hetatm.resName.FieldValue = UnspecifiedOrUnknownAminoAcid.Code3L;
                    //    hetatm.columnFormatLine = hetatm.columnFormatLine.Remove(ProteinDataBankFile.HETATM_Record.resName_Field.FirstColumn - 1, (ProteinDataBankFile.HETATM_Record.resName_Field.LastColumn - ProteinDataBankFile.HETATM_Record.resName_Field.FirstColumn) + 1);
                    //    hetatm.columnFormatLine = hetatm.columnFormatLine.Insert(ProteinDataBankFile.HETATM_Record.resName_Field.FirstColumn - 1, UnspecifiedOrUnknownAminoAcid.Code3L);
                    //    ////////Console.WriteLine(hetatm.columnFormatLine);
                    //}

                    if (ParameterValidation.IsAminoAcidCodeValid(hetatm.resName.FieldValue))
                    {
                        hetAtomRecordListDictionary[chainIdKey].Add(hetatm);
                    }
                }
                else if (currentRecord.GetType() == typeof(TER_Record))
                {
                    var ter = (TER_Record)currentRecord;

                    string chainIdKey = ter.chainID.FieldValue.Trim().ToUpperInvariant();

                    if (chainIdWhiteList != null && !chainIdWhiteList.Contains(chainIdKey))
                    {
                        continue;
                    }

                    terCount++;

                    if (terCount >= maximumChains)
                    {
                        break;
                        //return null;
                    }
                }
                else if (currentRecord.GetType() == typeof(ENDMDL_Record))
                {
                    break;
                }
            }

            // file has been parsed so clear used file data from memory as soon as possible
            proteinDataBankFile.UnloadFile();

            int totalChains = atomRecordListDictionary.Count > hetAtomRecordListDictionary.Count ? atomRecordListDictionary.Count : hetAtomRecordListDictionary.Count;

            for (int chainIndex = 0; chainIndex < totalChains; chainIndex++)
            {
                pdbFileChains.ChainList.Add(new ProteinAtomListContainer());
            }

            atomRecordListDictionary = atomRecordListDictionary.OrderBy(a => a.Key).ToDictionary(a => a.Key, a => a.Value);

            int chainIndex2 = -1;

            foreach (var atomRecordListKvp in atomRecordListDictionary)
            {
                chainIndex2++;

                string chainName = atomRecordListKvp.Key;
                List <ProteinDataBankFileRecord> chainRecords = atomRecordListKvp.Value;

                if (chainRecords == null || chainRecords.Count == 0)
                {
                    continue;
                }

                chainRecords = chainRecords.OrderBy(a => NullableTryParseInt32(((ATOM_Record)a).serial.FieldValue)).ToList();

                pdbFileChains.ChainList[chainIndex2].AtomList = chainRecords.Select(a => (ATOM_Record)a).ToList();
            }

            hetAtomRecordListDictionary = hetAtomRecordListDictionary.OrderBy(a => a.Key).ToDictionary(a => a.Key, a => a.Value);

            int chainIndex3 = -1;

            foreach (var hetAtomRecordListKvp in hetAtomRecordListDictionary)
            {
                chainIndex3++;
                string chainName = hetAtomRecordListKvp.Key;
                List <ProteinDataBankFileRecord> chainRecords = hetAtomRecordListKvp.Value;

                if (chainRecords == null || chainRecords.Count == 0)
                {
                    continue;
                }

                chainRecords = chainRecords.OrderBy(a => NullableTryParseInt32(((HETATM_Record)a).serial.FieldValue)).ToList();

                foreach (ProteinDataBankFileRecord proteinDataBankFileRecord in chainRecords)
                {
                    var chainRecord = (HETATM_Record)proteinDataBankFileRecord;

                    string residueSequenceToFind = chainRecord.resSeq.FieldValue;
                    string atomChainId           = chainRecord.chainID.FieldValue.Trim().ToUpperInvariant();

                    if (!atomRecordListDictionary.ContainsKey(atomChainId) || atomRecordListDictionary[atomChainId].Count(a => ((ATOM_Record)a).resSeq.FieldValue == residueSequenceToFind) == 0)
                    {
                        ATOM_Record atom = ConvertHetatmRecordToAtomRecord(chainRecord);

                        pdbFileChains.ChainList[chainIndex3].AtomList.Add(atom);
                    }
                }
            }

            int nonEmptyChainCount = pdbFileChains.ChainList.Count(a => a != null && a.AtomList != null && a.AtomList.Count > 0);

            if (nonEmptyChainCount >= minimumChains && nonEmptyChainCount <= maximumChains)
            {
                return(pdbFileChains);
            }

            ////////Console.WriteLine("Too many chains (" + nonEmptyChainCount + "): " + pdbFilename);
            return(null);
        }