/// <summary>
        ///     Loads a PDB file.  If target record types are specified, it will only load records of those types.
        /// </summary>
        /// <param name="filename">The filename of a PDB file.</param>
        /// <param name="targetRecordTypes">Target PDB file record types.</param>
        /// <param name="parseAll">Parse the whole PDB file into class instances.</param>
        public void LoadFile(string filename, string[] targetRecordTypes = null, bool parseAll = true)
        {
            UnloadFile();

            if (!File.Exists(filename))
            {
                throw new FileNotFoundException("The PDB file specified was not found.", filename);
                //return;
            }

            FileLinesArray = File.ReadAllLines(filename);

            TargetRecordTypes = targetRecordTypes;

            if (parseAll)
            {
                ProteinDataBankFileRecordList = new List <ProteinDataBankFileRecord>();

                for (int lineIndex = 0; lineIndex < FileLinesArray.Length; lineIndex++)
                {
                    ProteinDataBankFileRecord proteinDataBankFileRecord = ProteinDataBaseFileLineRecord(FileLinesArray[lineIndex]);
                    ProteinDataBankFileRecordList.Add(proteinDataBankFileRecord);
                }

                FileLinesArray = null;
            }
        }
Example #2
0
        /// <summary>
        ///     ToString override.  Returns a string representation of the currently loaded records of the PDB file.
        /// </summary>
        /// <returns></returns>
        public override string ToString()
        {
            var stringBuilder = new StringBuilder();

            for (int recordIndex = 0; recordIndex < Count; recordIndex++)
            {
                ProteinDataBankFileRecord proteinDataBankFileRecord = NextRecord();
                stringBuilder.AppendLine(proteinDataBankFileRecord != null ? proteinDataBankFileRecord.ToString() : string.Empty);
            }

            return(stringBuilder.ToString());
        }
Example #3
0
        /// <summary>
        ///     Returns the next PDB file record.
        /// </summary>
        /// <returns></returns>
        public ProteinDataBankFileRecord NextRecord()
        {
            _currentFileRecordNumber++;

            if (ProteinDataBankFileRecordList != null && _currentFileRecordNumber < ProteinDataBankFileRecordList.Count)
            {
                return(ProteinDataBankFileRecordList[_currentFileRecordNumber]);
            }
            if (FileLinesArray != null && _currentFileRecordNumber < FileLinesArray.Length)
            {
                ProteinDataBankFileRecord proteinDataBankFileRecord = ProteinDataBaseFileLineRecord(FileLinesArray[_currentFileRecordNumber]);
                return(proteinDataBankFileRecord);
            }
            UnloadFile();
            return(null);
        }
Example #4
0
        public static int PdbModelCount(string pdbFilename, int maximumToFind = -1)
        {
            if (!File.Exists(pdbFilename))
            {
                throw new FileNotFoundException("File not found", pdbFilename);
            }

            // Load pdb/protein file, excluding all records but ATOM, HETATM and TER.
            var proteinDataBankFile = new ProteinDataBankFile(pdbFilename, new[]
            {
                MODEL_Record.MODEL_Field.FieldName
                //ProteinDataBankFile.ENDMDL_Record.ENDMDL_Field.FieldName,
            });

            int modelCount = 0;

            //var endModelCount = 0;

            for (int proteinDataBankFileRecordIndex = 0; proteinDataBankFileRecordIndex < proteinDataBankFile.Count; proteinDataBankFileRecordIndex++)
            {
                ProteinDataBankFileRecord currentRecord = proteinDataBankFile.NextRecord();

                if (currentRecord == null)
                {
                    continue;
                }

                if (currentRecord.GetType() == typeof(MODEL_Record))
                {
                    var model = (MODEL_Record)currentRecord;
                    modelCount++;

                    if (maximumToFind > -1 && modelCount > maximumToFind)
                    {
                        break;
                    }
                }
                //else if (currentRecord.GetType() == typeof(ProteinDataBankFile.ENDMDL_Record))
                //{
                //    var endModel = (ProteinDataBankFile.ENDMDL_Record)currentRecord;
                //    endModelCount++;
                //}
            }

            return(modelCount);
        }
Example #5
0
        /// <summary>
        ///     Loads a PDB file.  If target record types are specified, it will only load records of those types.
        /// </summary>
        /// <param name="filename">The filename of a PDB file.</param>
        /// <param name="targetRecordTypes">Target PDB file record types.</param>
        /// <param name="parseAll">Parse the whole PDB file into class instances.</param>
        public void LoadFile(string[] structureFileLines, string[] targetRecordTypes = null, bool parseAll = true)
        {
            UnloadFile();

            FileLinesArray = structureFileLines;

            TargetRecordTypes = targetRecordTypes;

            if (parseAll)
            {
                ProteinDataBankFileRecordList = new List <ProteinDataBankFileRecord>();

                for (int lineIndex = 0; lineIndex < FileLinesArray.Length; lineIndex++)
                {
                    ProteinDataBankFileRecord proteinDataBankFileRecord = ProteinDataBaseFileLineRecord(FileLinesArray[lineIndex]);
                    ProteinDataBankFileRecordList.Add(proteinDataBankFileRecord);
                }

                FileLinesArray = null;
            }
        }
Example #6
0
        /// <summary>
        ///     Returns a PDB record class instance of the appropriate type for the given line of PDB file.
        /// </summary>
        /// <param name="columnFormatLine"></param>
        /// <returns></returns>
        public ProteinDataBankFileRecord ProteinDataBaseFileLineRecord(string columnFormatLine)
        {
            ProteinDataBankFileRecord result = null;
            string recordType = ProteinDataBankFileLineRecordType(columnFormatLine);

            if ((TargetRecordTypes != null) && (TargetRecordTypes.Length > 0) && (!TargetRecordTypes.Contains(recordType)))
            {
                return(result);
            }

            switch (recordType)
            {
            case "HEADER":
                result = new HEADER_Record(columnFormatLine);
                break;

            case "OBSLTE":
                result = new OBSLTE_Record(columnFormatLine);
                break;

            case "TITLE":
                result = new TITLE_Record(columnFormatLine);
                break;

            case "SPLIT":
                result = new SPLIT_Record(columnFormatLine);
                break;

            case "CAVEAT":
                result = new CAVEAT_Record(columnFormatLine);
                break;

            case "COMPND":
                result = new COMPND_Record(columnFormatLine);
                break;

            case "SOURCE":
                result = new SOURCE_Record(columnFormatLine);
                break;

            case "KEYWDS":
                result = new KEYWDS_Record(columnFormatLine);
                break;

            case "EXPDTA":
                result = new EXPDTA_Record(columnFormatLine);
                break;

            case "NUMMDL":
                result = new NUMMDL_Record(columnFormatLine);
                break;

            case "MDLTYP":
                result = new MDLTYP_Record(columnFormatLine);
                break;

            case "AUTHOR":
                result = new AUTHOR_Record(columnFormatLine);
                break;

            case "REVDAT":
                result = new REVDAT_Record(columnFormatLine);
                break;

            case "SPRSDE":
                result = new SPRSDE_Record(columnFormatLine);
                break;

            case "JRNL":
                result = new JRNL_DOI_Record(columnFormatLine);
                if (((JRNL_DOI_Record)result).DOI.FieldValue == JRNL_DOI_Record.DOI_Field.FieldName)
                {
                    break;
                }

                result = new JRNL_PMID_Record(columnFormatLine);
                if (((JRNL_PMID_Record)result).PMID.FieldValue == JRNL_PMID_Record.PMID_Field.FieldName)
                {
                    break;
                }

                result = new JRNL_PUBL_Record(columnFormatLine);
                if (((JRNL_PUBL_Record)result).PUBL.FieldValue == JRNL_PUBL_Record.PUBL_Field.FieldName)
                {
                    break;
                }

                result = new JRNL_REFN_ISSN_or_ESSN_Record(columnFormatLine);
                if (((JRNL_REFN_ISSN_or_ESSN_Record)result).REFN.FieldValue == JRNL_REFN_ISSN_or_ESSN_Record.REFN_Field.FieldName && (((JRNL_REFN_ISSN_or_ESSN_Record)result).ISSN_or_ESSN.FieldValue.ToUpperInvariant() == "ISSN" || ((JRNL_REFN_ISSN_or_ESSN_Record)result).ISSN_or_ESSN.FieldValue.ToUpperInvariant() == "ESSN"))
                {
                    break;
                }

                result = new JRNL_REF_V_Record(columnFormatLine);
                if (((JRNL_REF_V_Record)result).REF.FieldValue == JRNL_REF_V_Record.REF_Field.FieldName && ((JRNL_REF_V_Record)result).V_.FieldValue == JRNL_REF_V_Record.V_Field.FieldName)
                {
                    break;
                }

                result = new JRNL_REFN_Record(columnFormatLine);
                if (((JRNL_REFN_Record)result).REFN.FieldValue == JRNL_REFN_Record.REFN_Field.FieldName)
                {
                    break;
                }

                result = new JRNL_REF_TOBEPUBLISHED_Record(columnFormatLine);
                if (((JRNL_REF_TOBEPUBLISHED_Record)result).REF.FieldValue == JRNL_REF_TOBEPUBLISHED_Record.REF_Field.FieldName)
                {
                    break;
                }

                result = new JRNL_Record(columnFormatLine);
                break;

            case "REMARK":
                result = new REMARK_1_AUTH_Record(columnFormatLine);
                if (((REMARK_1_AUTH_Record)result)._1.FieldValue == REMARK_1_AUTH_Record._1_Field.FieldName && ((REMARK_1_AUTH_Record)result).AUTH.FieldValue == REMARK_1_AUTH_Record.AUTH_Field.FieldName)
                {
                    break;
                }

                result = new REMARK_1_PUBL_Record(columnFormatLine);
                if (((REMARK_1_PUBL_Record)result)._1.FieldValue == REMARK_1_PUBL_Record._1_Field.FieldName && ((REMARK_1_PUBL_Record)result).PUBL.FieldValue == REMARK_1_PUBL_Record.PUBL_Field.FieldName)
                {
                    break;
                }

                result = new REMARK_1_REF_V_Record(columnFormatLine);
                if (((REMARK_1_REF_V_Record)result)._1.FieldValue == REMARK_1_REF_V_Record._1_Field.FieldName && ((REMARK_1_REF_V_Record)result).REF.FieldValue == REMARK_1_REF_V_Record.REF_Field.FieldName && ((REMARK_1_REF_V_Record)result).V_.FieldValue == REMARK_1_REF_V_Record.V_Field.FieldName)
                {
                    break;
                }

                result = new REMARK_1_REFERENCE_Record(columnFormatLine);
                if (((REMARK_1_REFERENCE_Record)result)._1.FieldValue == REMARK_1_REFERENCE_Record._1_Field.FieldName && ((REMARK_1_REFERENCE_Record)result).REFERENCE.FieldValue == REMARK_1_REFERENCE_Record.REFERENCE_Field.FieldName)
                {
                    break;
                }

                result = new REMARK_1_REFN_ISSN_or_ESSN_Record(columnFormatLine);
                if (((REMARK_1_REFN_ISSN_or_ESSN_Record)result)._1.FieldValue == REMARK_1_REFN_ISSN_or_ESSN_Record._1_Field.FieldName && ((REMARK_1_REFN_ISSN_or_ESSN_Record)result).REFN.FieldValue == REMARK_1_REFN_ISSN_or_ESSN_Record.REFN_Field.FieldName && (((REMARK_1_REFN_ISSN_or_ESSN_Record)result).ISSN_or_ESSN.FieldValue == "ISSN" || ((REMARK_1_REFN_ISSN_or_ESSN_Record)result).ISSN_or_ESSN.FieldValue == "ESSN"))
                {
                    break;
                }

                result = new REMARK_1_REFN_Record(columnFormatLine);
                if (((REMARK_1_REFN_Record)result)._1.FieldValue == REMARK_1_REFN_Record._1_Field.FieldName && ((REMARK_1_REFN_Record)result).REFN.FieldValue == REMARK_1_REFN_Record.REFN_Field.FieldName)
                {
                    break;
                }

                result = new REMARK_1_TITL_Record(columnFormatLine);
                if (((REMARK_1_TITL_Record)result)._1.FieldValue == REMARK_1_TITL_Record._1_Field.FieldName && ((REMARK_1_TITL_Record)result).TITL.FieldValue == REMARK_1_TITL_Record.TITL_Field.FieldName)
                {
                    break;
                }

                result = new REMARK_2_RESOLUTION_ANGSTROMS_Record(columnFormatLine);
                if (((REMARK_2_RESOLUTION_ANGSTROMS_Record)result)._2.FieldValue == REMARK_2_RESOLUTION_ANGSTROMS_Record._2_Field.FieldName && ((REMARK_2_RESOLUTION_ANGSTROMS_Record)result).RESOLUTION_.FieldValue == REMARK_2_RESOLUTION_ANGSTROMS_Record.RESOLUTION_Field.FieldName && ((REMARK_2_RESOLUTION_ANGSTROMS_Record)result).ANGSTROMS_.FieldValue == REMARK_2_RESOLUTION_ANGSTROMS_Record.ANGSTROMS_Field.FieldName)
                {
                    break;
                }

                result = new REMARK_2_RESOLUTION_NOTAPPLICABLE_Record(columnFormatLine);
                if (((REMARK_2_RESOLUTION_NOTAPPLICABLE_Record)result)._2.FieldValue == REMARK_2_RESOLUTION_NOTAPPLICABLE_Record._2_Field.FieldName && ((REMARK_2_RESOLUTION_NOTAPPLICABLE_Record)result).RESOLUTION_NOT_APPLICABLE_.FieldValue == REMARK_2_RESOLUTION_NOTAPPLICABLE_Record.RESOLUTION_NOT_APPLICABLE_Field.FieldName)
                {
                    break;
                }

                result = new REMARK_1_REF_TOBEPUBLISHED_Record(columnFormatLine);
                if (((REMARK_1_REF_TOBEPUBLISHED_Record)result)._1.FieldValue == REMARK_1_REF_TOBEPUBLISHED_Record._1_Field.FieldName && ((REMARK_1_REF_TOBEPUBLISHED_Record)result).REF.FieldValue == REMARK_1_REF_TOBEPUBLISHED_Record.REF_Field.FieldName && ((REMARK_1_REF_TOBEPUBLISHED_Record)result).TO_BE_PUBLISHED.FieldValue == REMARK_1_REF_TOBEPUBLISHED_Record.TO_BE_PUBLISHED_Field.FieldName)
                {
                    break;
                }

                result = new REMARK_Record(columnFormatLine);
                break;

            case "DBREF":
                result = new DBREF_Record(columnFormatLine);
                break;

            case "DBREF1":
                result = new DBREF1_Record(columnFormatLine);
                break;

            case "DBREF2":
                result = new DBREF2_Record(columnFormatLine);
                break;

            case "SEQADV":
                result = new SEQADV_Record(columnFormatLine);
                break;

            case "SEQRES":
                result = new SEQRES_Record(columnFormatLine);
                break;

            case "MODRES":
                result = new MODRES_Record(columnFormatLine);
                break;

            case "HET":
                result = new HET_Record(columnFormatLine);
                break;

            case "HETNAM":
                result = new HETNAM_Record(columnFormatLine);
                break;

            case "HETSYN":
                result = new HETSYN_Record(columnFormatLine);
                break;

            case "FORMUL":
                result = new FORMUL_Record(columnFormatLine);
                break;

            case "HELIX":
                result = new HELIX_Record(columnFormatLine);
                break;

            case "SHEET":
                result = new SHEET_Record(columnFormatLine);
                break;

            case "SSBOND":
                result = new SSBOND_CYS_CYS_Record(columnFormatLine);
                break;

            case "LINK":
                result = new LINK_Record(columnFormatLine);
                break;

            case "CISPEP":
                result = new CISPEP_Record(columnFormatLine);
                break;

            case "SITE":
                result = new SITE_Record(columnFormatLine);
                break;

            case "CRYST1":
                result = new CRYST1_Record(columnFormatLine);
                break;

            case "ORIGX1":
                result = new ORIGX1_Record(columnFormatLine);
                break;

            case "ORIGX2":
                result = new ORIGX2_Record(columnFormatLine);
                break;

            case "ORIGX3":
                result = new ORIGX3_Record(columnFormatLine);
                break;

            case "SCALE1":
                result = new SCALE1_Record(columnFormatLine);
                break;

            case "SCALE2":
                result = new SCALE2_Record(columnFormatLine);
                break;

            case "SCALE3":
                result = new SCALE3_Record(columnFormatLine);
                break;

            case "MTRIX1":
                result = new MTRIX1_Record(columnFormatLine);
                break;

            case "MTRIX2":
                result = new MTRIX2_Record(columnFormatLine);
                break;

            case "MTRIX3":
                result = new MTRIX3_Record(columnFormatLine);
                break;

            case "MODEL":
                result = new MODEL_Record(columnFormatLine);
                break;

            case "ATOM":
                result = new ATOM_Record(columnFormatLine);
                break;

            case "ANISOU":
                result = new ANISOU_Record(columnFormatLine);
                break;

            case "TER":
                result = new TER_Record(columnFormatLine);
                break;

            case "HETATM":
                result = new HETATM_Record(columnFormatLine);
                break;

            case "ENDMDL":
                result = new ENDMDL_Record(columnFormatLine);
                break;

            case "CONECT":
                result = new CONECT_Record(columnFormatLine);
                break;

            case "MASTER":
                result = new MASTER_0_Record(columnFormatLine);
                break;

            case "END":
                result = new END_Record(columnFormatLine);
                break;
            }
            return(result);
        }
Example #7
0
        /// <summary>
        ///     This method loads 1 pdb file and returns the atoms contained in the different chains.
        /// </summary>
        /// <param name="pdbFilename"></param>
        /// <param name="chainIdWhiteList"></param>
        /// <param name="minimumChains"></param>
        /// <param name="maximumChains"></param>
        /// <returns></returns>
        public static ProteinChainListContainer PdbAtomicChains(string pdbFilename, string[] chainIdWhiteList, int minimumChains = 2, int maximumChains = 2, bool onlyCarbonAlphas = false)
        {
            ////////Console.WriteLine(pdbFilename);
            // Check file exists.
            if (!File.Exists(pdbFilename))
            {
                //return null;
                throw new FileNotFoundException("File not found", pdbFilename);
            }

            // Check min chains not more than max chains.
            if (minimumChains > maximumChains)
            {
                throw new ArgumentOutOfRangeException(nameof(minimumChains));
            }

            // Load pdb/protein file, excluding all records but ATOM, HETATM and TER.
            var proteinDataBankFile = new ProteinDataBankFile(pdbFilename, new[]
            {
                ATOM_Record.ATOM_Field.FieldName,
                HETATM_Record.HETATM_Field.FieldName,
                TER_Record.TER_Field.FieldName,
                MODEL_Record.MODEL_Field.FieldName,
                ENDMDL_Record.ENDMDL_Field.FieldName
            });


            // Make new array for atom chain.
            //List<ATOM_Record>[] proteinFileChains = new List<ATOM_Record>[maximumChains];
            var pdbFileChains = new ProteinChainListContainer();

            //var fileError = false;
            //var chainCount = 0;
            // Loop through all the previously loaded protein file records to make lists of atoms in each chain.
            // Also make a list of residue numbers (which will be sorted later just in case it is out of order).

            var atomRecordListDictionary    = new Dictionary <string, List <ProteinDataBankFileRecord> >();
            var hetAtomRecordListDictionary = new Dictionary <string, List <ProteinDataBankFileRecord> >();
            int terCount = 0;

            for (int proteinDataBankFileRecordIndex = 0; proteinDataBankFileRecordIndex < proteinDataBankFile.Count; proteinDataBankFileRecordIndex++)
            {
                ProteinDataBankFileRecord currentRecord = proteinDataBankFile.NextRecord();

                if (currentRecord == null)
                {
                    continue;
                }

                if (currentRecord.GetType() == typeof(ATOM_Record))
                {
                    var atom = (ATOM_Record)currentRecord;

                    if (onlyCarbonAlphas && atom.name.FieldValue.Trim().ToUpperInvariant() != StaticValues.CarbonAlpha)
                    {
                        continue;
                    }

                    string chainIdKey = atom.chainID.FieldValue.Trim().ToUpperInvariant();

                    if (chainIdWhiteList != null && !chainIdWhiteList.Contains(chainIdKey))
                    {
                        continue;
                    }

                    if (!atomRecordListDictionary.ContainsKey(chainIdKey))
                    {
                        atomRecordListDictionary.Add(chainIdKey, new List <ProteinDataBankFileRecord>());
                    }

                    if (ParameterValidation.IsAminoAcidCodeValid(atom.resName.FieldValue))
                    {
                        atomRecordListDictionary[chainIdKey].Add(atom);
                    }
                }
                else if (currentRecord.GetType() == typeof(HETATM_Record))
                {
                    var hetatm = (HETATM_Record)currentRecord;

                    if (onlyCarbonAlphas && hetatm.name.FieldValue.Trim().ToUpperInvariant() != StaticValues.CarbonAlpha)
                    {
                        continue;
                    }

                    string chainIdKey = hetatm.chainID.FieldValue.Trim().ToUpperInvariant();

                    if (chainIdWhiteList != null && !chainIdWhiteList.Contains(chainIdKey))
                    {
                        continue;
                    }

                    if (!hetAtomRecordListDictionary.ContainsKey(chainIdKey))
                    {
                        hetAtomRecordListDictionary.Add(chainIdKey, new List <ProteinDataBankFileRecord>());
                    }

                    //if (!ParameterValidation.IsAminoAcidCodeValid(hetatm.resName.FieldValue))
                    //{
                    //    ////////Console.WriteLine(hetatm.columnFormatLine);
                    //    hetatm.resName.FieldValue = UnspecifiedOrUnknownAminoAcid.Code3L;
                    //    hetatm.columnFormatLine = hetatm.columnFormatLine.Remove(ProteinDataBankFile.HETATM_Record.resName_Field.FirstColumn - 1, (ProteinDataBankFile.HETATM_Record.resName_Field.LastColumn - ProteinDataBankFile.HETATM_Record.resName_Field.FirstColumn) + 1);
                    //    hetatm.columnFormatLine = hetatm.columnFormatLine.Insert(ProteinDataBankFile.HETATM_Record.resName_Field.FirstColumn - 1, UnspecifiedOrUnknownAminoAcid.Code3L);
                    //    ////////Console.WriteLine(hetatm.columnFormatLine);
                    //}

                    if (ParameterValidation.IsAminoAcidCodeValid(hetatm.resName.FieldValue))
                    {
                        hetAtomRecordListDictionary[chainIdKey].Add(hetatm);
                    }
                }
                else if (currentRecord.GetType() == typeof(TER_Record))
                {
                    var ter = (TER_Record)currentRecord;

                    string chainIdKey = ter.chainID.FieldValue.Trim().ToUpperInvariant();

                    if (chainIdWhiteList != null && !chainIdWhiteList.Contains(chainIdKey))
                    {
                        continue;
                    }

                    terCount++;

                    if (terCount >= maximumChains)
                    {
                        break;
                        //return null;
                    }
                }
                else if (currentRecord.GetType() == typeof(ENDMDL_Record))
                {
                    break;
                }
            }

            // file has been parsed so clear used file data from memory as soon as possible
            proteinDataBankFile.UnloadFile();

            int totalChains = atomRecordListDictionary.Count > hetAtomRecordListDictionary.Count ? atomRecordListDictionary.Count : hetAtomRecordListDictionary.Count;

            for (int chainIndex = 0; chainIndex < totalChains; chainIndex++)
            {
                pdbFileChains.ChainList.Add(new ProteinAtomListContainer());
            }

            atomRecordListDictionary = atomRecordListDictionary.OrderBy(a => a.Key).ToDictionary(a => a.Key, a => a.Value);

            int chainIndex2 = -1;

            foreach (var atomRecordListKvp in atomRecordListDictionary)
            {
                chainIndex2++;

                string chainName = atomRecordListKvp.Key;
                List <ProteinDataBankFileRecord> chainRecords = atomRecordListKvp.Value;

                if (chainRecords == null || chainRecords.Count == 0)
                {
                    continue;
                }

                chainRecords = chainRecords.OrderBy(a => NullableTryParseInt32(((ATOM_Record)a).serial.FieldValue)).ToList();

                pdbFileChains.ChainList[chainIndex2].AtomList = chainRecords.Select(a => (ATOM_Record)a).ToList();
            }

            hetAtomRecordListDictionary = hetAtomRecordListDictionary.OrderBy(a => a.Key).ToDictionary(a => a.Key, a => a.Value);

            int chainIndex3 = -1;

            foreach (var hetAtomRecordListKvp in hetAtomRecordListDictionary)
            {
                chainIndex3++;
                string chainName = hetAtomRecordListKvp.Key;
                List <ProteinDataBankFileRecord> chainRecords = hetAtomRecordListKvp.Value;

                if (chainRecords == null || chainRecords.Count == 0)
                {
                    continue;
                }

                chainRecords = chainRecords.OrderBy(a => NullableTryParseInt32(((HETATM_Record)a).serial.FieldValue)).ToList();

                foreach (ProteinDataBankFileRecord proteinDataBankFileRecord in chainRecords)
                {
                    var chainRecord = (HETATM_Record)proteinDataBankFileRecord;

                    string residueSequenceToFind = chainRecord.resSeq.FieldValue;
                    string atomChainId           = chainRecord.chainID.FieldValue.Trim().ToUpperInvariant();

                    if (!atomRecordListDictionary.ContainsKey(atomChainId) || atomRecordListDictionary[atomChainId].Count(a => ((ATOM_Record)a).resSeq.FieldValue == residueSequenceToFind) == 0)
                    {
                        ATOM_Record atom = ConvertHetatmRecordToAtomRecord(chainRecord);

                        pdbFileChains.ChainList[chainIndex3].AtomList.Add(atom);
                    }
                }
            }

            int nonEmptyChainCount = pdbFileChains.ChainList.Count(a => a != null && a.AtomList != null && a.AtomList.Count > 0);

            if (nonEmptyChainCount >= minimumChains && nonEmptyChainCount <= maximumChains)
            {
                return(pdbFileChains);
            }

            ////////Console.WriteLine("Too many chains (" + nonEmptyChainCount + "): " + pdbFilename);
            return(null);
        }
Example #8
0
        public static int PdbAtomicChainsCount(string pdbFilename, string[] chainIdWhiteList = null, int maximumToFind = -1)
        {
            if (!File.Exists(pdbFilename))
            {
                throw new FileNotFoundException("File not found", pdbFilename);
            }

            // Load pdb/protein file, excluding all records but ATOM, HETATM and TER.
            var proteinDataBankFile = new ProteinDataBankFile(pdbFilename, new[]
            {
                ATOM_Record.ATOM_Field.FieldName,
                HETATM_Record.HETATM_Field.FieldName,
                //TER_Record.TER_Field.FieldName
                MODEL_Record.MODEL_Field.FieldName,
                ENDMDL_Record.ENDMDL_Field.FieldName
            });


            int atomCount    = 0;
            int hetAtomCount = 0;
            var terCount     = 0;

            var chainNames = new List <string>();

            for (int proteinDataBankFileRecordIndex = 0; proteinDataBankFileRecordIndex < proteinDataBankFile.Count; proteinDataBankFileRecordIndex++)
            {
                ProteinDataBankFileRecord currentRecord = proteinDataBankFile.NextRecord();

                if (currentRecord == null)
                {
                    continue;
                }

                if (currentRecord.GetType() == typeof(ATOM_Record))
                {
                    var atom = (ATOM_Record)currentRecord;

                    string chainIdKey = atom.chainID.FieldValue.Trim().ToUpperInvariant();

                    if (chainIdWhiteList != null && !chainIdWhiteList.Contains(chainIdKey))
                    {
                        continue;
                    }

                    atomCount++;

                    if (!string.IsNullOrWhiteSpace(atom.chainID.FieldValue) && !chainNames.Contains(atom.chainID.FieldValue))
                    {
                        chainNames.Add(atom.chainID.FieldValue);

                        if (maximumToFind > -1 && chainNames.Count > maximumToFind)
                        {
                            break;
                        }
                    }
                }
                else if (currentRecord.GetType() == typeof(HETATM_Record))
                {
                    var hetatm = (HETATM_Record)currentRecord;

                    string chainIdKey = hetatm.chainID.FieldValue.Trim().ToUpperInvariant();

                    if (chainIdWhiteList != null && !chainIdWhiteList.Contains(chainIdKey))
                    {
                        continue;
                    }

                    hetAtomCount++;

                    if (!string.IsNullOrWhiteSpace(hetatm.chainID.FieldValue) && !chainNames.Contains(hetatm.chainID.FieldValue))
                    {
                        chainNames.Add(hetatm.chainID.FieldValue);

                        if (maximumToFind > -1 && chainNames.Count > maximumToFind)
                        {
                            break;
                        }
                    }
                }
                else if (currentRecord.GetType() == typeof(ENDMDL_Record))
                {
                    break;
                }
                else if (currentRecord.GetType() == typeof(TER_Record))
                {
                    var ter = (TER_Record)currentRecord;

                    string chainIdKey = ter.chainID.FieldValue.Trim().ToUpperInvariant();

                    if (chainIdWhiteList != null && !chainIdWhiteList.Contains(chainIdKey))
                    {
                        continue;
                    }

                    terCount++;

                    if (!string.IsNullOrWhiteSpace(ter.chainID.FieldValue) && !chainNames.Contains(ter.chainID.FieldValue))
                    {
                        chainNames.Add(ter.chainID.FieldValue);
                    }
                }
            }

            int chainNamesCount = chainNames.Distinct().Count();

            //var chainCount = chainNamesCount > terCount ? chainNamesCount : terCount;

            return(chainNamesCount);
        }
Example #9
0
        public static List <string> PdbAtomAcidList(string pdbFilename, string[] chainIdWhiteList = null, bool onlyCarbonAlphas = true, bool distinct = true)
        {
            if (!File.Exists(pdbFilename))
            {
                throw new FileNotFoundException("File not found", pdbFilename);
            }

            // Load pdb/protein file, excluding all records but ATOM, HETATM and TER.
            var proteinDataBankFile = new ProteinDataBankFile(pdbFilename, new[]
            {
                ATOM_Record.ATOM_Field.FieldName,
                HETATM_Record.HETATM_Field.FieldName,
                //TER_Record.TER_Field.FieldName
            });

            var atomAcidList = new List <string>();

            for (int proteinDataBankFileRecordIndex = 0; proteinDataBankFileRecordIndex < proteinDataBankFile.Count; proteinDataBankFileRecordIndex++)
            {
                ProteinDataBankFileRecord currentRecord = proteinDataBankFile.NextRecord();

                if (currentRecord == null)
                {
                    continue;
                }

                if (currentRecord.GetType() == typeof(ATOM_Record))
                {
                    var atom = (ATOM_Record)currentRecord;

                    string chainIdKey = atom.chainID.FieldValue.Trim().ToUpperInvariant();

                    if (chainIdWhiteList != null && !chainIdWhiteList.Contains(chainIdKey))
                    {
                        continue;
                    }

                    if (onlyCarbonAlphas && atom.name.FieldValue.Trim().ToUpperInvariant() != StaticValues.CarbonAlpha)
                    {
                        continue;
                    }

                    if (!distinct || !atomAcidList.Contains(atom.resName.FieldValue))
                    {
                        atomAcidList.Add(atom.resName.FieldValue);
                    }
                }
                else if (currentRecord.GetType() == typeof(HETATM_Record))
                {
                    var hetatm = (HETATM_Record)currentRecord;

                    string chainIdKey = hetatm.chainID.FieldValue.Trim().ToUpperInvariant();

                    if (chainIdWhiteList != null && !chainIdWhiteList.Contains(chainIdKey))
                    {
                        continue;
                    }

                    if (onlyCarbonAlphas && hetatm.name.FieldValue.Trim().ToUpperInvariant() != StaticValues.CarbonAlpha)
                    {
                        continue;
                    }

                    if (!distinct || !atomAcidList.Contains(hetatm.resName.FieldValue))
                    {
                        atomAcidList.Add(hetatm.resName.FieldValue);
                    }
                }
                //else if (currentRecord.GetType() == typeof (HETATM_Record))
                //{
                //    var ter = (HETATM_Record)currentRecord;

                //    string chainIdKey = ter.chainID.FieldValue.Trim().ToUpperInvariant();

                //    if (chainIdWhiteList != null && !chainIdWhiteList.Contains(chainIdKey))
                //    {
                //        continue;
                //    }
                //}
            }


            return(atomAcidList);
        }