/// <summary> /// Loads a PDB file. If target record types are specified, it will only load records of those types. /// </summary> /// <param name="filename">The filename of a PDB file.</param> /// <param name="targetRecordTypes">Target PDB file record types.</param> /// <param name="parseAll">Parse the whole PDB file into class instances.</param> public void LoadFile(string filename, string[] targetRecordTypes = null, bool parseAll = true) { UnloadFile(); if (!File.Exists(filename)) { throw new FileNotFoundException("The PDB file specified was not found.", filename); //return; } FileLinesArray = File.ReadAllLines(filename); TargetRecordTypes = targetRecordTypes; if (parseAll) { ProteinDataBankFileRecordList = new List <ProteinDataBankFileRecord>(); for (int lineIndex = 0; lineIndex < FileLinesArray.Length; lineIndex++) { ProteinDataBankFileRecord proteinDataBankFileRecord = ProteinDataBaseFileLineRecord(FileLinesArray[lineIndex]); ProteinDataBankFileRecordList.Add(proteinDataBankFileRecord); } FileLinesArray = null; } }
/// <summary> /// ToString override. Returns a string representation of the currently loaded records of the PDB file. /// </summary> /// <returns></returns> public override string ToString() { var stringBuilder = new StringBuilder(); for (int recordIndex = 0; recordIndex < Count; recordIndex++) { ProteinDataBankFileRecord proteinDataBankFileRecord = NextRecord(); stringBuilder.AppendLine(proteinDataBankFileRecord != null ? proteinDataBankFileRecord.ToString() : string.Empty); } return(stringBuilder.ToString()); }
/// <summary> /// Returns the next PDB file record. /// </summary> /// <returns></returns> public ProteinDataBankFileRecord NextRecord() { _currentFileRecordNumber++; if (ProteinDataBankFileRecordList != null && _currentFileRecordNumber < ProteinDataBankFileRecordList.Count) { return(ProteinDataBankFileRecordList[_currentFileRecordNumber]); } if (FileLinesArray != null && _currentFileRecordNumber < FileLinesArray.Length) { ProteinDataBankFileRecord proteinDataBankFileRecord = ProteinDataBaseFileLineRecord(FileLinesArray[_currentFileRecordNumber]); return(proteinDataBankFileRecord); } UnloadFile(); return(null); }
public static int PdbModelCount(string pdbFilename, int maximumToFind = -1) { if (!File.Exists(pdbFilename)) { throw new FileNotFoundException("File not found", pdbFilename); } // Load pdb/protein file, excluding all records but ATOM, HETATM and TER. var proteinDataBankFile = new ProteinDataBankFile(pdbFilename, new[] { MODEL_Record.MODEL_Field.FieldName //ProteinDataBankFile.ENDMDL_Record.ENDMDL_Field.FieldName, }); int modelCount = 0; //var endModelCount = 0; for (int proteinDataBankFileRecordIndex = 0; proteinDataBankFileRecordIndex < proteinDataBankFile.Count; proteinDataBankFileRecordIndex++) { ProteinDataBankFileRecord currentRecord = proteinDataBankFile.NextRecord(); if (currentRecord == null) { continue; } if (currentRecord.GetType() == typeof(MODEL_Record)) { var model = (MODEL_Record)currentRecord; modelCount++; if (maximumToFind > -1 && modelCount > maximumToFind) { break; } } //else if (currentRecord.GetType() == typeof(ProteinDataBankFile.ENDMDL_Record)) //{ // var endModel = (ProteinDataBankFile.ENDMDL_Record)currentRecord; // endModelCount++; //} } return(modelCount); }
/// <summary> /// Loads a PDB file. If target record types are specified, it will only load records of those types. /// </summary> /// <param name="filename">The filename of a PDB file.</param> /// <param name="targetRecordTypes">Target PDB file record types.</param> /// <param name="parseAll">Parse the whole PDB file into class instances.</param> public void LoadFile(string[] structureFileLines, string[] targetRecordTypes = null, bool parseAll = true) { UnloadFile(); FileLinesArray = structureFileLines; TargetRecordTypes = targetRecordTypes; if (parseAll) { ProteinDataBankFileRecordList = new List <ProteinDataBankFileRecord>(); for (int lineIndex = 0; lineIndex < FileLinesArray.Length; lineIndex++) { ProteinDataBankFileRecord proteinDataBankFileRecord = ProteinDataBaseFileLineRecord(FileLinesArray[lineIndex]); ProteinDataBankFileRecordList.Add(proteinDataBankFileRecord); } FileLinesArray = null; } }
/// <summary> /// Returns a PDB record class instance of the appropriate type for the given line of PDB file. /// </summary> /// <param name="columnFormatLine"></param> /// <returns></returns> public ProteinDataBankFileRecord ProteinDataBaseFileLineRecord(string columnFormatLine) { ProteinDataBankFileRecord result = null; string recordType = ProteinDataBankFileLineRecordType(columnFormatLine); if ((TargetRecordTypes != null) && (TargetRecordTypes.Length > 0) && (!TargetRecordTypes.Contains(recordType))) { return(result); } switch (recordType) { case "HEADER": result = new HEADER_Record(columnFormatLine); break; case "OBSLTE": result = new OBSLTE_Record(columnFormatLine); break; case "TITLE": result = new TITLE_Record(columnFormatLine); break; case "SPLIT": result = new SPLIT_Record(columnFormatLine); break; case "CAVEAT": result = new CAVEAT_Record(columnFormatLine); break; case "COMPND": result = new COMPND_Record(columnFormatLine); break; case "SOURCE": result = new SOURCE_Record(columnFormatLine); break; case "KEYWDS": result = new KEYWDS_Record(columnFormatLine); break; case "EXPDTA": result = new EXPDTA_Record(columnFormatLine); break; case "NUMMDL": result = new NUMMDL_Record(columnFormatLine); break; case "MDLTYP": result = new MDLTYP_Record(columnFormatLine); break; case "AUTHOR": result = new AUTHOR_Record(columnFormatLine); break; case "REVDAT": result = new REVDAT_Record(columnFormatLine); break; case "SPRSDE": result = new SPRSDE_Record(columnFormatLine); break; case "JRNL": result = new JRNL_DOI_Record(columnFormatLine); if (((JRNL_DOI_Record)result).DOI.FieldValue == JRNL_DOI_Record.DOI_Field.FieldName) { break; } result = new JRNL_PMID_Record(columnFormatLine); if (((JRNL_PMID_Record)result).PMID.FieldValue == JRNL_PMID_Record.PMID_Field.FieldName) { break; } result = new JRNL_PUBL_Record(columnFormatLine); if (((JRNL_PUBL_Record)result).PUBL.FieldValue == JRNL_PUBL_Record.PUBL_Field.FieldName) { break; } result = new JRNL_REFN_ISSN_or_ESSN_Record(columnFormatLine); if (((JRNL_REFN_ISSN_or_ESSN_Record)result).REFN.FieldValue == JRNL_REFN_ISSN_or_ESSN_Record.REFN_Field.FieldName && (((JRNL_REFN_ISSN_or_ESSN_Record)result).ISSN_or_ESSN.FieldValue.ToUpperInvariant() == "ISSN" || ((JRNL_REFN_ISSN_or_ESSN_Record)result).ISSN_or_ESSN.FieldValue.ToUpperInvariant() == "ESSN")) { break; } result = new JRNL_REF_V_Record(columnFormatLine); if (((JRNL_REF_V_Record)result).REF.FieldValue == JRNL_REF_V_Record.REF_Field.FieldName && ((JRNL_REF_V_Record)result).V_.FieldValue == JRNL_REF_V_Record.V_Field.FieldName) { break; } result = new JRNL_REFN_Record(columnFormatLine); if (((JRNL_REFN_Record)result).REFN.FieldValue == JRNL_REFN_Record.REFN_Field.FieldName) { break; } result = new JRNL_REF_TOBEPUBLISHED_Record(columnFormatLine); if (((JRNL_REF_TOBEPUBLISHED_Record)result).REF.FieldValue == JRNL_REF_TOBEPUBLISHED_Record.REF_Field.FieldName) { break; } result = new JRNL_Record(columnFormatLine); break; case "REMARK": result = new REMARK_1_AUTH_Record(columnFormatLine); if (((REMARK_1_AUTH_Record)result)._1.FieldValue == REMARK_1_AUTH_Record._1_Field.FieldName && ((REMARK_1_AUTH_Record)result).AUTH.FieldValue == REMARK_1_AUTH_Record.AUTH_Field.FieldName) { break; } result = new REMARK_1_PUBL_Record(columnFormatLine); if (((REMARK_1_PUBL_Record)result)._1.FieldValue == REMARK_1_PUBL_Record._1_Field.FieldName && ((REMARK_1_PUBL_Record)result).PUBL.FieldValue == REMARK_1_PUBL_Record.PUBL_Field.FieldName) { break; } result = new REMARK_1_REF_V_Record(columnFormatLine); if (((REMARK_1_REF_V_Record)result)._1.FieldValue == REMARK_1_REF_V_Record._1_Field.FieldName && ((REMARK_1_REF_V_Record)result).REF.FieldValue == REMARK_1_REF_V_Record.REF_Field.FieldName && ((REMARK_1_REF_V_Record)result).V_.FieldValue == REMARK_1_REF_V_Record.V_Field.FieldName) { break; } result = new REMARK_1_REFERENCE_Record(columnFormatLine); if (((REMARK_1_REFERENCE_Record)result)._1.FieldValue == REMARK_1_REFERENCE_Record._1_Field.FieldName && ((REMARK_1_REFERENCE_Record)result).REFERENCE.FieldValue == REMARK_1_REFERENCE_Record.REFERENCE_Field.FieldName) { break; } result = new REMARK_1_REFN_ISSN_or_ESSN_Record(columnFormatLine); if (((REMARK_1_REFN_ISSN_or_ESSN_Record)result)._1.FieldValue == REMARK_1_REFN_ISSN_or_ESSN_Record._1_Field.FieldName && ((REMARK_1_REFN_ISSN_or_ESSN_Record)result).REFN.FieldValue == REMARK_1_REFN_ISSN_or_ESSN_Record.REFN_Field.FieldName && (((REMARK_1_REFN_ISSN_or_ESSN_Record)result).ISSN_or_ESSN.FieldValue == "ISSN" || ((REMARK_1_REFN_ISSN_or_ESSN_Record)result).ISSN_or_ESSN.FieldValue == "ESSN")) { break; } result = new REMARK_1_REFN_Record(columnFormatLine); if (((REMARK_1_REFN_Record)result)._1.FieldValue == REMARK_1_REFN_Record._1_Field.FieldName && ((REMARK_1_REFN_Record)result).REFN.FieldValue == REMARK_1_REFN_Record.REFN_Field.FieldName) { break; } result = new REMARK_1_TITL_Record(columnFormatLine); if (((REMARK_1_TITL_Record)result)._1.FieldValue == REMARK_1_TITL_Record._1_Field.FieldName && ((REMARK_1_TITL_Record)result).TITL.FieldValue == REMARK_1_TITL_Record.TITL_Field.FieldName) { break; } result = new REMARK_2_RESOLUTION_ANGSTROMS_Record(columnFormatLine); if (((REMARK_2_RESOLUTION_ANGSTROMS_Record)result)._2.FieldValue == REMARK_2_RESOLUTION_ANGSTROMS_Record._2_Field.FieldName && ((REMARK_2_RESOLUTION_ANGSTROMS_Record)result).RESOLUTION_.FieldValue == REMARK_2_RESOLUTION_ANGSTROMS_Record.RESOLUTION_Field.FieldName && ((REMARK_2_RESOLUTION_ANGSTROMS_Record)result).ANGSTROMS_.FieldValue == REMARK_2_RESOLUTION_ANGSTROMS_Record.ANGSTROMS_Field.FieldName) { break; } result = new REMARK_2_RESOLUTION_NOTAPPLICABLE_Record(columnFormatLine); if (((REMARK_2_RESOLUTION_NOTAPPLICABLE_Record)result)._2.FieldValue == REMARK_2_RESOLUTION_NOTAPPLICABLE_Record._2_Field.FieldName && ((REMARK_2_RESOLUTION_NOTAPPLICABLE_Record)result).RESOLUTION_NOT_APPLICABLE_.FieldValue == REMARK_2_RESOLUTION_NOTAPPLICABLE_Record.RESOLUTION_NOT_APPLICABLE_Field.FieldName) { break; } result = new REMARK_1_REF_TOBEPUBLISHED_Record(columnFormatLine); if (((REMARK_1_REF_TOBEPUBLISHED_Record)result)._1.FieldValue == REMARK_1_REF_TOBEPUBLISHED_Record._1_Field.FieldName && ((REMARK_1_REF_TOBEPUBLISHED_Record)result).REF.FieldValue == REMARK_1_REF_TOBEPUBLISHED_Record.REF_Field.FieldName && ((REMARK_1_REF_TOBEPUBLISHED_Record)result).TO_BE_PUBLISHED.FieldValue == REMARK_1_REF_TOBEPUBLISHED_Record.TO_BE_PUBLISHED_Field.FieldName) { break; } result = new REMARK_Record(columnFormatLine); break; case "DBREF": result = new DBREF_Record(columnFormatLine); break; case "DBREF1": result = new DBREF1_Record(columnFormatLine); break; case "DBREF2": result = new DBREF2_Record(columnFormatLine); break; case "SEQADV": result = new SEQADV_Record(columnFormatLine); break; case "SEQRES": result = new SEQRES_Record(columnFormatLine); break; case "MODRES": result = new MODRES_Record(columnFormatLine); break; case "HET": result = new HET_Record(columnFormatLine); break; case "HETNAM": result = new HETNAM_Record(columnFormatLine); break; case "HETSYN": result = new HETSYN_Record(columnFormatLine); break; case "FORMUL": result = new FORMUL_Record(columnFormatLine); break; case "HELIX": result = new HELIX_Record(columnFormatLine); break; case "SHEET": result = new SHEET_Record(columnFormatLine); break; case "SSBOND": result = new SSBOND_CYS_CYS_Record(columnFormatLine); break; case "LINK": result = new LINK_Record(columnFormatLine); break; case "CISPEP": result = new CISPEP_Record(columnFormatLine); break; case "SITE": result = new SITE_Record(columnFormatLine); break; case "CRYST1": result = new CRYST1_Record(columnFormatLine); break; case "ORIGX1": result = new ORIGX1_Record(columnFormatLine); break; case "ORIGX2": result = new ORIGX2_Record(columnFormatLine); break; case "ORIGX3": result = new ORIGX3_Record(columnFormatLine); break; case "SCALE1": result = new SCALE1_Record(columnFormatLine); break; case "SCALE2": result = new SCALE2_Record(columnFormatLine); break; case "SCALE3": result = new SCALE3_Record(columnFormatLine); break; case "MTRIX1": result = new MTRIX1_Record(columnFormatLine); break; case "MTRIX2": result = new MTRIX2_Record(columnFormatLine); break; case "MTRIX3": result = new MTRIX3_Record(columnFormatLine); break; case "MODEL": result = new MODEL_Record(columnFormatLine); break; case "ATOM": result = new ATOM_Record(columnFormatLine); break; case "ANISOU": result = new ANISOU_Record(columnFormatLine); break; case "TER": result = new TER_Record(columnFormatLine); break; case "HETATM": result = new HETATM_Record(columnFormatLine); break; case "ENDMDL": result = new ENDMDL_Record(columnFormatLine); break; case "CONECT": result = new CONECT_Record(columnFormatLine); break; case "MASTER": result = new MASTER_0_Record(columnFormatLine); break; case "END": result = new END_Record(columnFormatLine); break; } return(result); }
/// <summary> /// This method loads 1 pdb file and returns the atoms contained in the different chains. /// </summary> /// <param name="pdbFilename"></param> /// <param name="chainIdWhiteList"></param> /// <param name="minimumChains"></param> /// <param name="maximumChains"></param> /// <returns></returns> public static ProteinChainListContainer PdbAtomicChains(string pdbFilename, string[] chainIdWhiteList, int minimumChains = 2, int maximumChains = 2, bool onlyCarbonAlphas = false) { ////////Console.WriteLine(pdbFilename); // Check file exists. if (!File.Exists(pdbFilename)) { //return null; throw new FileNotFoundException("File not found", pdbFilename); } // Check min chains not more than max chains. if (minimumChains > maximumChains) { throw new ArgumentOutOfRangeException(nameof(minimumChains)); } // Load pdb/protein file, excluding all records but ATOM, HETATM and TER. var proteinDataBankFile = new ProteinDataBankFile(pdbFilename, new[] { ATOM_Record.ATOM_Field.FieldName, HETATM_Record.HETATM_Field.FieldName, TER_Record.TER_Field.FieldName, MODEL_Record.MODEL_Field.FieldName, ENDMDL_Record.ENDMDL_Field.FieldName }); // Make new array for atom chain. //List<ATOM_Record>[] proteinFileChains = new List<ATOM_Record>[maximumChains]; var pdbFileChains = new ProteinChainListContainer(); //var fileError = false; //var chainCount = 0; // Loop through all the previously loaded protein file records to make lists of atoms in each chain. // Also make a list of residue numbers (which will be sorted later just in case it is out of order). var atomRecordListDictionary = new Dictionary <string, List <ProteinDataBankFileRecord> >(); var hetAtomRecordListDictionary = new Dictionary <string, List <ProteinDataBankFileRecord> >(); int terCount = 0; for (int proteinDataBankFileRecordIndex = 0; proteinDataBankFileRecordIndex < proteinDataBankFile.Count; proteinDataBankFileRecordIndex++) { ProteinDataBankFileRecord currentRecord = proteinDataBankFile.NextRecord(); if (currentRecord == null) { continue; } if (currentRecord.GetType() == typeof(ATOM_Record)) { var atom = (ATOM_Record)currentRecord; if (onlyCarbonAlphas && atom.name.FieldValue.Trim().ToUpperInvariant() != StaticValues.CarbonAlpha) { continue; } string chainIdKey = atom.chainID.FieldValue.Trim().ToUpperInvariant(); if (chainIdWhiteList != null && !chainIdWhiteList.Contains(chainIdKey)) { continue; } if (!atomRecordListDictionary.ContainsKey(chainIdKey)) { atomRecordListDictionary.Add(chainIdKey, new List <ProteinDataBankFileRecord>()); } if (ParameterValidation.IsAminoAcidCodeValid(atom.resName.FieldValue)) { atomRecordListDictionary[chainIdKey].Add(atom); } } else if (currentRecord.GetType() == typeof(HETATM_Record)) { var hetatm = (HETATM_Record)currentRecord; if (onlyCarbonAlphas && hetatm.name.FieldValue.Trim().ToUpperInvariant() != StaticValues.CarbonAlpha) { continue; } string chainIdKey = hetatm.chainID.FieldValue.Trim().ToUpperInvariant(); if (chainIdWhiteList != null && !chainIdWhiteList.Contains(chainIdKey)) { continue; } if (!hetAtomRecordListDictionary.ContainsKey(chainIdKey)) { hetAtomRecordListDictionary.Add(chainIdKey, new List <ProteinDataBankFileRecord>()); } //if (!ParameterValidation.IsAminoAcidCodeValid(hetatm.resName.FieldValue)) //{ // ////////Console.WriteLine(hetatm.columnFormatLine); // hetatm.resName.FieldValue = UnspecifiedOrUnknownAminoAcid.Code3L; // hetatm.columnFormatLine = hetatm.columnFormatLine.Remove(ProteinDataBankFile.HETATM_Record.resName_Field.FirstColumn - 1, (ProteinDataBankFile.HETATM_Record.resName_Field.LastColumn - ProteinDataBankFile.HETATM_Record.resName_Field.FirstColumn) + 1); // hetatm.columnFormatLine = hetatm.columnFormatLine.Insert(ProteinDataBankFile.HETATM_Record.resName_Field.FirstColumn - 1, UnspecifiedOrUnknownAminoAcid.Code3L); // ////////Console.WriteLine(hetatm.columnFormatLine); //} if (ParameterValidation.IsAminoAcidCodeValid(hetatm.resName.FieldValue)) { hetAtomRecordListDictionary[chainIdKey].Add(hetatm); } } else if (currentRecord.GetType() == typeof(TER_Record)) { var ter = (TER_Record)currentRecord; string chainIdKey = ter.chainID.FieldValue.Trim().ToUpperInvariant(); if (chainIdWhiteList != null && !chainIdWhiteList.Contains(chainIdKey)) { continue; } terCount++; if (terCount >= maximumChains) { break; //return null; } } else if (currentRecord.GetType() == typeof(ENDMDL_Record)) { break; } } // file has been parsed so clear used file data from memory as soon as possible proteinDataBankFile.UnloadFile(); int totalChains = atomRecordListDictionary.Count > hetAtomRecordListDictionary.Count ? atomRecordListDictionary.Count : hetAtomRecordListDictionary.Count; for (int chainIndex = 0; chainIndex < totalChains; chainIndex++) { pdbFileChains.ChainList.Add(new ProteinAtomListContainer()); } atomRecordListDictionary = atomRecordListDictionary.OrderBy(a => a.Key).ToDictionary(a => a.Key, a => a.Value); int chainIndex2 = -1; foreach (var atomRecordListKvp in atomRecordListDictionary) { chainIndex2++; string chainName = atomRecordListKvp.Key; List <ProteinDataBankFileRecord> chainRecords = atomRecordListKvp.Value; if (chainRecords == null || chainRecords.Count == 0) { continue; } chainRecords = chainRecords.OrderBy(a => NullableTryParseInt32(((ATOM_Record)a).serial.FieldValue)).ToList(); pdbFileChains.ChainList[chainIndex2].AtomList = chainRecords.Select(a => (ATOM_Record)a).ToList(); } hetAtomRecordListDictionary = hetAtomRecordListDictionary.OrderBy(a => a.Key).ToDictionary(a => a.Key, a => a.Value); int chainIndex3 = -1; foreach (var hetAtomRecordListKvp in hetAtomRecordListDictionary) { chainIndex3++; string chainName = hetAtomRecordListKvp.Key; List <ProteinDataBankFileRecord> chainRecords = hetAtomRecordListKvp.Value; if (chainRecords == null || chainRecords.Count == 0) { continue; } chainRecords = chainRecords.OrderBy(a => NullableTryParseInt32(((HETATM_Record)a).serial.FieldValue)).ToList(); foreach (ProteinDataBankFileRecord proteinDataBankFileRecord in chainRecords) { var chainRecord = (HETATM_Record)proteinDataBankFileRecord; string residueSequenceToFind = chainRecord.resSeq.FieldValue; string atomChainId = chainRecord.chainID.FieldValue.Trim().ToUpperInvariant(); if (!atomRecordListDictionary.ContainsKey(atomChainId) || atomRecordListDictionary[atomChainId].Count(a => ((ATOM_Record)a).resSeq.FieldValue == residueSequenceToFind) == 0) { ATOM_Record atom = ConvertHetatmRecordToAtomRecord(chainRecord); pdbFileChains.ChainList[chainIndex3].AtomList.Add(atom); } } } int nonEmptyChainCount = pdbFileChains.ChainList.Count(a => a != null && a.AtomList != null && a.AtomList.Count > 0); if (nonEmptyChainCount >= minimumChains && nonEmptyChainCount <= maximumChains) { return(pdbFileChains); } ////////Console.WriteLine("Too many chains (" + nonEmptyChainCount + "): " + pdbFilename); return(null); }
public static int PdbAtomicChainsCount(string pdbFilename, string[] chainIdWhiteList = null, int maximumToFind = -1) { if (!File.Exists(pdbFilename)) { throw new FileNotFoundException("File not found", pdbFilename); } // Load pdb/protein file, excluding all records but ATOM, HETATM and TER. var proteinDataBankFile = new ProteinDataBankFile(pdbFilename, new[] { ATOM_Record.ATOM_Field.FieldName, HETATM_Record.HETATM_Field.FieldName, //TER_Record.TER_Field.FieldName MODEL_Record.MODEL_Field.FieldName, ENDMDL_Record.ENDMDL_Field.FieldName }); int atomCount = 0; int hetAtomCount = 0; var terCount = 0; var chainNames = new List <string>(); for (int proteinDataBankFileRecordIndex = 0; proteinDataBankFileRecordIndex < proteinDataBankFile.Count; proteinDataBankFileRecordIndex++) { ProteinDataBankFileRecord currentRecord = proteinDataBankFile.NextRecord(); if (currentRecord == null) { continue; } if (currentRecord.GetType() == typeof(ATOM_Record)) { var atom = (ATOM_Record)currentRecord; string chainIdKey = atom.chainID.FieldValue.Trim().ToUpperInvariant(); if (chainIdWhiteList != null && !chainIdWhiteList.Contains(chainIdKey)) { continue; } atomCount++; if (!string.IsNullOrWhiteSpace(atom.chainID.FieldValue) && !chainNames.Contains(atom.chainID.FieldValue)) { chainNames.Add(atom.chainID.FieldValue); if (maximumToFind > -1 && chainNames.Count > maximumToFind) { break; } } } else if (currentRecord.GetType() == typeof(HETATM_Record)) { var hetatm = (HETATM_Record)currentRecord; string chainIdKey = hetatm.chainID.FieldValue.Trim().ToUpperInvariant(); if (chainIdWhiteList != null && !chainIdWhiteList.Contains(chainIdKey)) { continue; } hetAtomCount++; if (!string.IsNullOrWhiteSpace(hetatm.chainID.FieldValue) && !chainNames.Contains(hetatm.chainID.FieldValue)) { chainNames.Add(hetatm.chainID.FieldValue); if (maximumToFind > -1 && chainNames.Count > maximumToFind) { break; } } } else if (currentRecord.GetType() == typeof(ENDMDL_Record)) { break; } else if (currentRecord.GetType() == typeof(TER_Record)) { var ter = (TER_Record)currentRecord; string chainIdKey = ter.chainID.FieldValue.Trim().ToUpperInvariant(); if (chainIdWhiteList != null && !chainIdWhiteList.Contains(chainIdKey)) { continue; } terCount++; if (!string.IsNullOrWhiteSpace(ter.chainID.FieldValue) && !chainNames.Contains(ter.chainID.FieldValue)) { chainNames.Add(ter.chainID.FieldValue); } } } int chainNamesCount = chainNames.Distinct().Count(); //var chainCount = chainNamesCount > terCount ? chainNamesCount : terCount; return(chainNamesCount); }
public static List <string> PdbAtomAcidList(string pdbFilename, string[] chainIdWhiteList = null, bool onlyCarbonAlphas = true, bool distinct = true) { if (!File.Exists(pdbFilename)) { throw new FileNotFoundException("File not found", pdbFilename); } // Load pdb/protein file, excluding all records but ATOM, HETATM and TER. var proteinDataBankFile = new ProteinDataBankFile(pdbFilename, new[] { ATOM_Record.ATOM_Field.FieldName, HETATM_Record.HETATM_Field.FieldName, //TER_Record.TER_Field.FieldName }); var atomAcidList = new List <string>(); for (int proteinDataBankFileRecordIndex = 0; proteinDataBankFileRecordIndex < proteinDataBankFile.Count; proteinDataBankFileRecordIndex++) { ProteinDataBankFileRecord currentRecord = proteinDataBankFile.NextRecord(); if (currentRecord == null) { continue; } if (currentRecord.GetType() == typeof(ATOM_Record)) { var atom = (ATOM_Record)currentRecord; string chainIdKey = atom.chainID.FieldValue.Trim().ToUpperInvariant(); if (chainIdWhiteList != null && !chainIdWhiteList.Contains(chainIdKey)) { continue; } if (onlyCarbonAlphas && atom.name.FieldValue.Trim().ToUpperInvariant() != StaticValues.CarbonAlpha) { continue; } if (!distinct || !atomAcidList.Contains(atom.resName.FieldValue)) { atomAcidList.Add(atom.resName.FieldValue); } } else if (currentRecord.GetType() == typeof(HETATM_Record)) { var hetatm = (HETATM_Record)currentRecord; string chainIdKey = hetatm.chainID.FieldValue.Trim().ToUpperInvariant(); if (chainIdWhiteList != null && !chainIdWhiteList.Contains(chainIdKey)) { continue; } if (onlyCarbonAlphas && hetatm.name.FieldValue.Trim().ToUpperInvariant() != StaticValues.CarbonAlpha) { continue; } if (!distinct || !atomAcidList.Contains(hetatm.resName.FieldValue)) { atomAcidList.Add(hetatm.resName.FieldValue); } } //else if (currentRecord.GetType() == typeof (HETATM_Record)) //{ // var ter = (HETATM_Record)currentRecord; // string chainIdKey = ter.chainID.FieldValue.Trim().ToUpperInvariant(); // if (chainIdWhiteList != null && !chainIdWhiteList.Contains(chainIdKey)) // { // continue; // } //} } return(atomAcidList); }