Пример #1
0
        public static int PdbModelCount(string pdbFilename, int maximumToFind = -1)
        {
            if (!File.Exists(pdbFilename))
            {
                throw new FileNotFoundException("File not found", pdbFilename);
            }

            // Load pdb/protein file, excluding all records but ATOM, HETATM and TER.
            var proteinDataBankFile = new ProteinDataBankFile(pdbFilename, new[]
            {
                MODEL_Record.MODEL_Field.FieldName
                //ProteinDataBankFile.ENDMDL_Record.ENDMDL_Field.FieldName,
            });

            int modelCount = 0;

            //var endModelCount = 0;

            for (int proteinDataBankFileRecordIndex = 0; proteinDataBankFileRecordIndex < proteinDataBankFile.Count; proteinDataBankFileRecordIndex++)
            {
                ProteinDataBankFileRecord currentRecord = proteinDataBankFile.NextRecord();

                if (currentRecord == null)
                {
                    continue;
                }

                if (currentRecord.GetType() == typeof(MODEL_Record))
                {
                    var model = (MODEL_Record)currentRecord;
                    modelCount++;

                    if (maximumToFind > -1 && modelCount > maximumToFind)
                    {
                        break;
                    }
                }
                //else if (currentRecord.GetType() == typeof(ProteinDataBankFile.ENDMDL_Record))
                //{
                //    var endModel = (ProteinDataBankFile.ENDMDL_Record)currentRecord;
                //    endModelCount++;
                //}
            }

            return(modelCount);
        }
Пример #2
0
        /// <summary>
        ///     This method loads 1 pdb file and returns the atoms contained in the different chains.
        /// </summary>
        /// <param name="pdbFilename"></param>
        /// <param name="chainIdWhiteList"></param>
        /// <param name="minimumChains"></param>
        /// <param name="maximumChains"></param>
        /// <returns></returns>
        public static ProteinChainListContainer PdbAtomicChains(string pdbFilename, string[] chainIdWhiteList, int minimumChains = 2, int maximumChains = 2, bool onlyCarbonAlphas = false)
        {
            ////////Console.WriteLine(pdbFilename);
            // Check file exists.
            if (!File.Exists(pdbFilename))
            {
                //return null;
                throw new FileNotFoundException("File not found", pdbFilename);
            }

            // Check min chains not more than max chains.
            if (minimumChains > maximumChains)
            {
                throw new ArgumentOutOfRangeException(nameof(minimumChains));
            }

            // Load pdb/protein file, excluding all records but ATOM, HETATM and TER.
            var proteinDataBankFile = new ProteinDataBankFile(pdbFilename, new[]
            {
                ATOM_Record.ATOM_Field.FieldName,
                HETATM_Record.HETATM_Field.FieldName,
                TER_Record.TER_Field.FieldName,
                MODEL_Record.MODEL_Field.FieldName,
                ENDMDL_Record.ENDMDL_Field.FieldName
            });


            // Make new array for atom chain.
            //List<ATOM_Record>[] proteinFileChains = new List<ATOM_Record>[maximumChains];
            var pdbFileChains = new ProteinChainListContainer();

            //var fileError = false;
            //var chainCount = 0;
            // Loop through all the previously loaded protein file records to make lists of atoms in each chain.
            // Also make a list of residue numbers (which will be sorted later just in case it is out of order).

            var atomRecordListDictionary    = new Dictionary <string, List <ProteinDataBankFileRecord> >();
            var hetAtomRecordListDictionary = new Dictionary <string, List <ProteinDataBankFileRecord> >();
            int terCount = 0;

            for (int proteinDataBankFileRecordIndex = 0; proteinDataBankFileRecordIndex < proteinDataBankFile.Count; proteinDataBankFileRecordIndex++)
            {
                ProteinDataBankFileRecord currentRecord = proteinDataBankFile.NextRecord();

                if (currentRecord == null)
                {
                    continue;
                }

                if (currentRecord.GetType() == typeof(ATOM_Record))
                {
                    var atom = (ATOM_Record)currentRecord;

                    if (onlyCarbonAlphas && atom.name.FieldValue.Trim().ToUpperInvariant() != StaticValues.CarbonAlpha)
                    {
                        continue;
                    }

                    string chainIdKey = atom.chainID.FieldValue.Trim().ToUpperInvariant();

                    if (chainIdWhiteList != null && !chainIdWhiteList.Contains(chainIdKey))
                    {
                        continue;
                    }

                    if (!atomRecordListDictionary.ContainsKey(chainIdKey))
                    {
                        atomRecordListDictionary.Add(chainIdKey, new List <ProteinDataBankFileRecord>());
                    }

                    if (ParameterValidation.IsAminoAcidCodeValid(atom.resName.FieldValue))
                    {
                        atomRecordListDictionary[chainIdKey].Add(atom);
                    }
                }
                else if (currentRecord.GetType() == typeof(HETATM_Record))
                {
                    var hetatm = (HETATM_Record)currentRecord;

                    if (onlyCarbonAlphas && hetatm.name.FieldValue.Trim().ToUpperInvariant() != StaticValues.CarbonAlpha)
                    {
                        continue;
                    }

                    string chainIdKey = hetatm.chainID.FieldValue.Trim().ToUpperInvariant();

                    if (chainIdWhiteList != null && !chainIdWhiteList.Contains(chainIdKey))
                    {
                        continue;
                    }

                    if (!hetAtomRecordListDictionary.ContainsKey(chainIdKey))
                    {
                        hetAtomRecordListDictionary.Add(chainIdKey, new List <ProteinDataBankFileRecord>());
                    }

                    //if (!ParameterValidation.IsAminoAcidCodeValid(hetatm.resName.FieldValue))
                    //{
                    //    ////////Console.WriteLine(hetatm.columnFormatLine);
                    //    hetatm.resName.FieldValue = UnspecifiedOrUnknownAminoAcid.Code3L;
                    //    hetatm.columnFormatLine = hetatm.columnFormatLine.Remove(ProteinDataBankFile.HETATM_Record.resName_Field.FirstColumn - 1, (ProteinDataBankFile.HETATM_Record.resName_Field.LastColumn - ProteinDataBankFile.HETATM_Record.resName_Field.FirstColumn) + 1);
                    //    hetatm.columnFormatLine = hetatm.columnFormatLine.Insert(ProteinDataBankFile.HETATM_Record.resName_Field.FirstColumn - 1, UnspecifiedOrUnknownAminoAcid.Code3L);
                    //    ////////Console.WriteLine(hetatm.columnFormatLine);
                    //}

                    if (ParameterValidation.IsAminoAcidCodeValid(hetatm.resName.FieldValue))
                    {
                        hetAtomRecordListDictionary[chainIdKey].Add(hetatm);
                    }
                }
                else if (currentRecord.GetType() == typeof(TER_Record))
                {
                    var ter = (TER_Record)currentRecord;

                    string chainIdKey = ter.chainID.FieldValue.Trim().ToUpperInvariant();

                    if (chainIdWhiteList != null && !chainIdWhiteList.Contains(chainIdKey))
                    {
                        continue;
                    }

                    terCount++;

                    if (terCount >= maximumChains)
                    {
                        break;
                        //return null;
                    }
                }
                else if (currentRecord.GetType() == typeof(ENDMDL_Record))
                {
                    break;
                }
            }

            // file has been parsed so clear used file data from memory as soon as possible
            proteinDataBankFile.UnloadFile();

            int totalChains = atomRecordListDictionary.Count > hetAtomRecordListDictionary.Count ? atomRecordListDictionary.Count : hetAtomRecordListDictionary.Count;

            for (int chainIndex = 0; chainIndex < totalChains; chainIndex++)
            {
                pdbFileChains.ChainList.Add(new ProteinAtomListContainer());
            }

            atomRecordListDictionary = atomRecordListDictionary.OrderBy(a => a.Key).ToDictionary(a => a.Key, a => a.Value);

            int chainIndex2 = -1;

            foreach (var atomRecordListKvp in atomRecordListDictionary)
            {
                chainIndex2++;

                string chainName = atomRecordListKvp.Key;
                List <ProteinDataBankFileRecord> chainRecords = atomRecordListKvp.Value;

                if (chainRecords == null || chainRecords.Count == 0)
                {
                    continue;
                }

                chainRecords = chainRecords.OrderBy(a => NullableTryParseInt32(((ATOM_Record)a).serial.FieldValue)).ToList();

                pdbFileChains.ChainList[chainIndex2].AtomList = chainRecords.Select(a => (ATOM_Record)a).ToList();
            }

            hetAtomRecordListDictionary = hetAtomRecordListDictionary.OrderBy(a => a.Key).ToDictionary(a => a.Key, a => a.Value);

            int chainIndex3 = -1;

            foreach (var hetAtomRecordListKvp in hetAtomRecordListDictionary)
            {
                chainIndex3++;
                string chainName = hetAtomRecordListKvp.Key;
                List <ProteinDataBankFileRecord> chainRecords = hetAtomRecordListKvp.Value;

                if (chainRecords == null || chainRecords.Count == 0)
                {
                    continue;
                }

                chainRecords = chainRecords.OrderBy(a => NullableTryParseInt32(((HETATM_Record)a).serial.FieldValue)).ToList();

                foreach (ProteinDataBankFileRecord proteinDataBankFileRecord in chainRecords)
                {
                    var chainRecord = (HETATM_Record)proteinDataBankFileRecord;

                    string residueSequenceToFind = chainRecord.resSeq.FieldValue;
                    string atomChainId           = chainRecord.chainID.FieldValue.Trim().ToUpperInvariant();

                    if (!atomRecordListDictionary.ContainsKey(atomChainId) || atomRecordListDictionary[atomChainId].Count(a => ((ATOM_Record)a).resSeq.FieldValue == residueSequenceToFind) == 0)
                    {
                        ATOM_Record atom = ConvertHetatmRecordToAtomRecord(chainRecord);

                        pdbFileChains.ChainList[chainIndex3].AtomList.Add(atom);
                    }
                }
            }

            int nonEmptyChainCount = pdbFileChains.ChainList.Count(a => a != null && a.AtomList != null && a.AtomList.Count > 0);

            if (nonEmptyChainCount >= minimumChains && nonEmptyChainCount <= maximumChains)
            {
                return(pdbFileChains);
            }

            ////////Console.WriteLine("Too many chains (" + nonEmptyChainCount + "): " + pdbFilename);
            return(null);
        }
Пример #3
0
        public static int PdbAtomicChainsCount(string pdbFilename, string[] chainIdWhiteList = null, int maximumToFind = -1)
        {
            if (!File.Exists(pdbFilename))
            {
                throw new FileNotFoundException("File not found", pdbFilename);
            }

            // Load pdb/protein file, excluding all records but ATOM, HETATM and TER.
            var proteinDataBankFile = new ProteinDataBankFile(pdbFilename, new[]
            {
                ATOM_Record.ATOM_Field.FieldName,
                HETATM_Record.HETATM_Field.FieldName,
                //TER_Record.TER_Field.FieldName
                MODEL_Record.MODEL_Field.FieldName,
                ENDMDL_Record.ENDMDL_Field.FieldName
            });


            int atomCount    = 0;
            int hetAtomCount = 0;
            var terCount     = 0;

            var chainNames = new List <string>();

            for (int proteinDataBankFileRecordIndex = 0; proteinDataBankFileRecordIndex < proteinDataBankFile.Count; proteinDataBankFileRecordIndex++)
            {
                ProteinDataBankFileRecord currentRecord = proteinDataBankFile.NextRecord();

                if (currentRecord == null)
                {
                    continue;
                }

                if (currentRecord.GetType() == typeof(ATOM_Record))
                {
                    var atom = (ATOM_Record)currentRecord;

                    string chainIdKey = atom.chainID.FieldValue.Trim().ToUpperInvariant();

                    if (chainIdWhiteList != null && !chainIdWhiteList.Contains(chainIdKey))
                    {
                        continue;
                    }

                    atomCount++;

                    if (!string.IsNullOrWhiteSpace(atom.chainID.FieldValue) && !chainNames.Contains(atom.chainID.FieldValue))
                    {
                        chainNames.Add(atom.chainID.FieldValue);

                        if (maximumToFind > -1 && chainNames.Count > maximumToFind)
                        {
                            break;
                        }
                    }
                }
                else if (currentRecord.GetType() == typeof(HETATM_Record))
                {
                    var hetatm = (HETATM_Record)currentRecord;

                    string chainIdKey = hetatm.chainID.FieldValue.Trim().ToUpperInvariant();

                    if (chainIdWhiteList != null && !chainIdWhiteList.Contains(chainIdKey))
                    {
                        continue;
                    }

                    hetAtomCount++;

                    if (!string.IsNullOrWhiteSpace(hetatm.chainID.FieldValue) && !chainNames.Contains(hetatm.chainID.FieldValue))
                    {
                        chainNames.Add(hetatm.chainID.FieldValue);

                        if (maximumToFind > -1 && chainNames.Count > maximumToFind)
                        {
                            break;
                        }
                    }
                }
                else if (currentRecord.GetType() == typeof(ENDMDL_Record))
                {
                    break;
                }
                else if (currentRecord.GetType() == typeof(TER_Record))
                {
                    var ter = (TER_Record)currentRecord;

                    string chainIdKey = ter.chainID.FieldValue.Trim().ToUpperInvariant();

                    if (chainIdWhiteList != null && !chainIdWhiteList.Contains(chainIdKey))
                    {
                        continue;
                    }

                    terCount++;

                    if (!string.IsNullOrWhiteSpace(ter.chainID.FieldValue) && !chainNames.Contains(ter.chainID.FieldValue))
                    {
                        chainNames.Add(ter.chainID.FieldValue);
                    }
                }
            }

            int chainNamesCount = chainNames.Distinct().Count();

            //var chainCount = chainNamesCount > terCount ? chainNamesCount : terCount;

            return(chainNamesCount);
        }
Пример #4
0
        public static List <string> PdbAtomAcidList(string pdbFilename, string[] chainIdWhiteList = null, bool onlyCarbonAlphas = true, bool distinct = true)
        {
            if (!File.Exists(pdbFilename))
            {
                throw new FileNotFoundException("File not found", pdbFilename);
            }

            // Load pdb/protein file, excluding all records but ATOM, HETATM and TER.
            var proteinDataBankFile = new ProteinDataBankFile(pdbFilename, new[]
            {
                ATOM_Record.ATOM_Field.FieldName,
                HETATM_Record.HETATM_Field.FieldName,
                //TER_Record.TER_Field.FieldName
            });

            var atomAcidList = new List <string>();

            for (int proteinDataBankFileRecordIndex = 0; proteinDataBankFileRecordIndex < proteinDataBankFile.Count; proteinDataBankFileRecordIndex++)
            {
                ProteinDataBankFileRecord currentRecord = proteinDataBankFile.NextRecord();

                if (currentRecord == null)
                {
                    continue;
                }

                if (currentRecord.GetType() == typeof(ATOM_Record))
                {
                    var atom = (ATOM_Record)currentRecord;

                    string chainIdKey = atom.chainID.FieldValue.Trim().ToUpperInvariant();

                    if (chainIdWhiteList != null && !chainIdWhiteList.Contains(chainIdKey))
                    {
                        continue;
                    }

                    if (onlyCarbonAlphas && atom.name.FieldValue.Trim().ToUpperInvariant() != StaticValues.CarbonAlpha)
                    {
                        continue;
                    }

                    if (!distinct || !atomAcidList.Contains(atom.resName.FieldValue))
                    {
                        atomAcidList.Add(atom.resName.FieldValue);
                    }
                }
                else if (currentRecord.GetType() == typeof(HETATM_Record))
                {
                    var hetatm = (HETATM_Record)currentRecord;

                    string chainIdKey = hetatm.chainID.FieldValue.Trim().ToUpperInvariant();

                    if (chainIdWhiteList != null && !chainIdWhiteList.Contains(chainIdKey))
                    {
                        continue;
                    }

                    if (onlyCarbonAlphas && hetatm.name.FieldValue.Trim().ToUpperInvariant() != StaticValues.CarbonAlpha)
                    {
                        continue;
                    }

                    if (!distinct || !atomAcidList.Contains(hetatm.resName.FieldValue))
                    {
                        atomAcidList.Add(hetatm.resName.FieldValue);
                    }
                }
                //else if (currentRecord.GetType() == typeof (HETATM_Record))
                //{
                //    var ter = (HETATM_Record)currentRecord;

                //    string chainIdKey = ter.chainID.FieldValue.Trim().ToUpperInvariant();

                //    if (chainIdWhiteList != null && !chainIdWhiteList.Contains(chainIdKey))
                //    {
                //        continue;
                //    }
                //}
            }


            return(atomAcidList);
        }