public static int PdbModelCount(string pdbFilename, int maximumToFind = -1) { if (!File.Exists(pdbFilename)) { throw new FileNotFoundException("File not found", pdbFilename); } // Load pdb/protein file, excluding all records but ATOM, HETATM and TER. var proteinDataBankFile = new ProteinDataBankFile(pdbFilename, new[] { MODEL_Record.MODEL_Field.FieldName //ProteinDataBankFile.ENDMDL_Record.ENDMDL_Field.FieldName, }); int modelCount = 0; //var endModelCount = 0; for (int proteinDataBankFileRecordIndex = 0; proteinDataBankFileRecordIndex < proteinDataBankFile.Count; proteinDataBankFileRecordIndex++) { ProteinDataBankFileRecord currentRecord = proteinDataBankFile.NextRecord(); if (currentRecord == null) { continue; } if (currentRecord.GetType() == typeof(MODEL_Record)) { var model = (MODEL_Record)currentRecord; modelCount++; if (maximumToFind > -1 && modelCount > maximumToFind) { break; } } //else if (currentRecord.GetType() == typeof(ProteinDataBankFile.ENDMDL_Record)) //{ // var endModel = (ProteinDataBankFile.ENDMDL_Record)currentRecord; // endModelCount++; //} } return(modelCount); }
/// <summary> /// This method loads 1 pdb file and returns the atoms contained in the different chains. /// </summary> /// <param name="pdbFilename"></param> /// <param name="chainIdWhiteList"></param> /// <param name="minimumChains"></param> /// <param name="maximumChains"></param> /// <returns></returns> public static ProteinChainListContainer PdbAtomicChains(string pdbFilename, string[] chainIdWhiteList, int minimumChains = 2, int maximumChains = 2, bool onlyCarbonAlphas = false) { ////////Console.WriteLine(pdbFilename); // Check file exists. if (!File.Exists(pdbFilename)) { //return null; throw new FileNotFoundException("File not found", pdbFilename); } // Check min chains not more than max chains. if (minimumChains > maximumChains) { throw new ArgumentOutOfRangeException(nameof(minimumChains)); } // Load pdb/protein file, excluding all records but ATOM, HETATM and TER. var proteinDataBankFile = new ProteinDataBankFile(pdbFilename, new[] { ATOM_Record.ATOM_Field.FieldName, HETATM_Record.HETATM_Field.FieldName, TER_Record.TER_Field.FieldName, MODEL_Record.MODEL_Field.FieldName, ENDMDL_Record.ENDMDL_Field.FieldName }); // Make new array for atom chain. //List<ATOM_Record>[] proteinFileChains = new List<ATOM_Record>[maximumChains]; var pdbFileChains = new ProteinChainListContainer(); //var fileError = false; //var chainCount = 0; // Loop through all the previously loaded protein file records to make lists of atoms in each chain. // Also make a list of residue numbers (which will be sorted later just in case it is out of order). var atomRecordListDictionary = new Dictionary <string, List <ProteinDataBankFileRecord> >(); var hetAtomRecordListDictionary = new Dictionary <string, List <ProteinDataBankFileRecord> >(); int terCount = 0; for (int proteinDataBankFileRecordIndex = 0; proteinDataBankFileRecordIndex < proteinDataBankFile.Count; proteinDataBankFileRecordIndex++) { ProteinDataBankFileRecord currentRecord = proteinDataBankFile.NextRecord(); if (currentRecord == null) { continue; } if (currentRecord.GetType() == typeof(ATOM_Record)) { var atom = (ATOM_Record)currentRecord; if (onlyCarbonAlphas && atom.name.FieldValue.Trim().ToUpperInvariant() != StaticValues.CarbonAlpha) { continue; } string chainIdKey = atom.chainID.FieldValue.Trim().ToUpperInvariant(); if (chainIdWhiteList != null && !chainIdWhiteList.Contains(chainIdKey)) { continue; } if (!atomRecordListDictionary.ContainsKey(chainIdKey)) { atomRecordListDictionary.Add(chainIdKey, new List <ProteinDataBankFileRecord>()); } if (ParameterValidation.IsAminoAcidCodeValid(atom.resName.FieldValue)) { atomRecordListDictionary[chainIdKey].Add(atom); } } else if (currentRecord.GetType() == typeof(HETATM_Record)) { var hetatm = (HETATM_Record)currentRecord; if (onlyCarbonAlphas && hetatm.name.FieldValue.Trim().ToUpperInvariant() != StaticValues.CarbonAlpha) { continue; } string chainIdKey = hetatm.chainID.FieldValue.Trim().ToUpperInvariant(); if (chainIdWhiteList != null && !chainIdWhiteList.Contains(chainIdKey)) { continue; } if (!hetAtomRecordListDictionary.ContainsKey(chainIdKey)) { hetAtomRecordListDictionary.Add(chainIdKey, new List <ProteinDataBankFileRecord>()); } //if (!ParameterValidation.IsAminoAcidCodeValid(hetatm.resName.FieldValue)) //{ // ////////Console.WriteLine(hetatm.columnFormatLine); // hetatm.resName.FieldValue = UnspecifiedOrUnknownAminoAcid.Code3L; // hetatm.columnFormatLine = hetatm.columnFormatLine.Remove(ProteinDataBankFile.HETATM_Record.resName_Field.FirstColumn - 1, (ProteinDataBankFile.HETATM_Record.resName_Field.LastColumn - ProteinDataBankFile.HETATM_Record.resName_Field.FirstColumn) + 1); // hetatm.columnFormatLine = hetatm.columnFormatLine.Insert(ProteinDataBankFile.HETATM_Record.resName_Field.FirstColumn - 1, UnspecifiedOrUnknownAminoAcid.Code3L); // ////////Console.WriteLine(hetatm.columnFormatLine); //} if (ParameterValidation.IsAminoAcidCodeValid(hetatm.resName.FieldValue)) { hetAtomRecordListDictionary[chainIdKey].Add(hetatm); } } else if (currentRecord.GetType() == typeof(TER_Record)) { var ter = (TER_Record)currentRecord; string chainIdKey = ter.chainID.FieldValue.Trim().ToUpperInvariant(); if (chainIdWhiteList != null && !chainIdWhiteList.Contains(chainIdKey)) { continue; } terCount++; if (terCount >= maximumChains) { break; //return null; } } else if (currentRecord.GetType() == typeof(ENDMDL_Record)) { break; } } // file has been parsed so clear used file data from memory as soon as possible proteinDataBankFile.UnloadFile(); int totalChains = atomRecordListDictionary.Count > hetAtomRecordListDictionary.Count ? atomRecordListDictionary.Count : hetAtomRecordListDictionary.Count; for (int chainIndex = 0; chainIndex < totalChains; chainIndex++) { pdbFileChains.ChainList.Add(new ProteinAtomListContainer()); } atomRecordListDictionary = atomRecordListDictionary.OrderBy(a => a.Key).ToDictionary(a => a.Key, a => a.Value); int chainIndex2 = -1; foreach (var atomRecordListKvp in atomRecordListDictionary) { chainIndex2++; string chainName = atomRecordListKvp.Key; List <ProteinDataBankFileRecord> chainRecords = atomRecordListKvp.Value; if (chainRecords == null || chainRecords.Count == 0) { continue; } chainRecords = chainRecords.OrderBy(a => NullableTryParseInt32(((ATOM_Record)a).serial.FieldValue)).ToList(); pdbFileChains.ChainList[chainIndex2].AtomList = chainRecords.Select(a => (ATOM_Record)a).ToList(); } hetAtomRecordListDictionary = hetAtomRecordListDictionary.OrderBy(a => a.Key).ToDictionary(a => a.Key, a => a.Value); int chainIndex3 = -1; foreach (var hetAtomRecordListKvp in hetAtomRecordListDictionary) { chainIndex3++; string chainName = hetAtomRecordListKvp.Key; List <ProteinDataBankFileRecord> chainRecords = hetAtomRecordListKvp.Value; if (chainRecords == null || chainRecords.Count == 0) { continue; } chainRecords = chainRecords.OrderBy(a => NullableTryParseInt32(((HETATM_Record)a).serial.FieldValue)).ToList(); foreach (ProteinDataBankFileRecord proteinDataBankFileRecord in chainRecords) { var chainRecord = (HETATM_Record)proteinDataBankFileRecord; string residueSequenceToFind = chainRecord.resSeq.FieldValue; string atomChainId = chainRecord.chainID.FieldValue.Trim().ToUpperInvariant(); if (!atomRecordListDictionary.ContainsKey(atomChainId) || atomRecordListDictionary[atomChainId].Count(a => ((ATOM_Record)a).resSeq.FieldValue == residueSequenceToFind) == 0) { ATOM_Record atom = ConvertHetatmRecordToAtomRecord(chainRecord); pdbFileChains.ChainList[chainIndex3].AtomList.Add(atom); } } } int nonEmptyChainCount = pdbFileChains.ChainList.Count(a => a != null && a.AtomList != null && a.AtomList.Count > 0); if (nonEmptyChainCount >= minimumChains && nonEmptyChainCount <= maximumChains) { return(pdbFileChains); } ////////Console.WriteLine("Too many chains (" + nonEmptyChainCount + "): " + pdbFilename); return(null); }
public static int PdbAtomicChainsCount(string pdbFilename, string[] chainIdWhiteList = null, int maximumToFind = -1) { if (!File.Exists(pdbFilename)) { throw new FileNotFoundException("File not found", pdbFilename); } // Load pdb/protein file, excluding all records but ATOM, HETATM and TER. var proteinDataBankFile = new ProteinDataBankFile(pdbFilename, new[] { ATOM_Record.ATOM_Field.FieldName, HETATM_Record.HETATM_Field.FieldName, //TER_Record.TER_Field.FieldName MODEL_Record.MODEL_Field.FieldName, ENDMDL_Record.ENDMDL_Field.FieldName }); int atomCount = 0; int hetAtomCount = 0; var terCount = 0; var chainNames = new List <string>(); for (int proteinDataBankFileRecordIndex = 0; proteinDataBankFileRecordIndex < proteinDataBankFile.Count; proteinDataBankFileRecordIndex++) { ProteinDataBankFileRecord currentRecord = proteinDataBankFile.NextRecord(); if (currentRecord == null) { continue; } if (currentRecord.GetType() == typeof(ATOM_Record)) { var atom = (ATOM_Record)currentRecord; string chainIdKey = atom.chainID.FieldValue.Trim().ToUpperInvariant(); if (chainIdWhiteList != null && !chainIdWhiteList.Contains(chainIdKey)) { continue; } atomCount++; if (!string.IsNullOrWhiteSpace(atom.chainID.FieldValue) && !chainNames.Contains(atom.chainID.FieldValue)) { chainNames.Add(atom.chainID.FieldValue); if (maximumToFind > -1 && chainNames.Count > maximumToFind) { break; } } } else if (currentRecord.GetType() == typeof(HETATM_Record)) { var hetatm = (HETATM_Record)currentRecord; string chainIdKey = hetatm.chainID.FieldValue.Trim().ToUpperInvariant(); if (chainIdWhiteList != null && !chainIdWhiteList.Contains(chainIdKey)) { continue; } hetAtomCount++; if (!string.IsNullOrWhiteSpace(hetatm.chainID.FieldValue) && !chainNames.Contains(hetatm.chainID.FieldValue)) { chainNames.Add(hetatm.chainID.FieldValue); if (maximumToFind > -1 && chainNames.Count > maximumToFind) { break; } } } else if (currentRecord.GetType() == typeof(ENDMDL_Record)) { break; } else if (currentRecord.GetType() == typeof(TER_Record)) { var ter = (TER_Record)currentRecord; string chainIdKey = ter.chainID.FieldValue.Trim().ToUpperInvariant(); if (chainIdWhiteList != null && !chainIdWhiteList.Contains(chainIdKey)) { continue; } terCount++; if (!string.IsNullOrWhiteSpace(ter.chainID.FieldValue) && !chainNames.Contains(ter.chainID.FieldValue)) { chainNames.Add(ter.chainID.FieldValue); } } } int chainNamesCount = chainNames.Distinct().Count(); //var chainCount = chainNamesCount > terCount ? chainNamesCount : terCount; return(chainNamesCount); }
public static List <string> PdbAtomAcidList(string pdbFilename, string[] chainIdWhiteList = null, bool onlyCarbonAlphas = true, bool distinct = true) { if (!File.Exists(pdbFilename)) { throw new FileNotFoundException("File not found", pdbFilename); } // Load pdb/protein file, excluding all records but ATOM, HETATM and TER. var proteinDataBankFile = new ProteinDataBankFile(pdbFilename, new[] { ATOM_Record.ATOM_Field.FieldName, HETATM_Record.HETATM_Field.FieldName, //TER_Record.TER_Field.FieldName }); var atomAcidList = new List <string>(); for (int proteinDataBankFileRecordIndex = 0; proteinDataBankFileRecordIndex < proteinDataBankFile.Count; proteinDataBankFileRecordIndex++) { ProteinDataBankFileRecord currentRecord = proteinDataBankFile.NextRecord(); if (currentRecord == null) { continue; } if (currentRecord.GetType() == typeof(ATOM_Record)) { var atom = (ATOM_Record)currentRecord; string chainIdKey = atom.chainID.FieldValue.Trim().ToUpperInvariant(); if (chainIdWhiteList != null && !chainIdWhiteList.Contains(chainIdKey)) { continue; } if (onlyCarbonAlphas && atom.name.FieldValue.Trim().ToUpperInvariant() != StaticValues.CarbonAlpha) { continue; } if (!distinct || !atomAcidList.Contains(atom.resName.FieldValue)) { atomAcidList.Add(atom.resName.FieldValue); } } else if (currentRecord.GetType() == typeof(HETATM_Record)) { var hetatm = (HETATM_Record)currentRecord; string chainIdKey = hetatm.chainID.FieldValue.Trim().ToUpperInvariant(); if (chainIdWhiteList != null && !chainIdWhiteList.Contains(chainIdKey)) { continue; } if (onlyCarbonAlphas && hetatm.name.FieldValue.Trim().ToUpperInvariant() != StaticValues.CarbonAlpha) { continue; } if (!distinct || !atomAcidList.Contains(hetatm.resName.FieldValue)) { atomAcidList.Add(hetatm.resName.FieldValue); } } //else if (currentRecord.GetType() == typeof (HETATM_Record)) //{ // var ter = (HETATM_Record)currentRecord; // string chainIdKey = ter.chainID.FieldValue.Trim().ToUpperInvariant(); // if (chainIdWhiteList != null && !chainIdWhiteList.Contains(chainIdKey)) // { // continue; // } //} } return(atomAcidList); }