public static PrimaryStructure GetStructure(string filename) { PrimaryStructure model; StreamReader sr = null; try { sr = new StreamReader(filename); int atomCount = Int32.Parse(sr.ReadLine().Trim()); string titleLine = sr.ReadLine(); model = new PrimaryStructure(); model.Title = titleLine; model.Time = 0; for (int i = 0; i < atomCount; i++) { string atomLine = sr.ReadLine(); Regex g = new Regex(@"\s*?(\w+)\s+([\d\.\-]+)\s+([\d\.\-]+)\s+([\d\.\-]+).*$"); Match m = g.Match(atomLine); int atomID = int.Parse(i.ToString()); string name = m.Groups[1].Value.Trim(); Element element = ElementHelper.Parse(name); Vector3 position = new Vector3(); position.x = float.Parse(m.Groups[2].Value) / 10f; position.y = float.Parse(m.Groups[3].Value) / 10f; position.z = float.Parse(m.Groups[4].Value) / 10f; Atom atom = new Atom(i, atomID, name, element, position); atom.ResidueType = StandardResidue.None; model.AddAtom(i, atom); } } catch (Exception e) { throw new FileParseException(e.Message); } finally { if (sr != null) { sr.Close(); } } return(model); }
/// <summary> /// This method parses a PDB formatted file for a molecular primary structure. /// It will only parse the first model in the file (as defined by the ENDMDL record. Additional models in the PDB file will be discarded. /// </summary> /// <param name="filename"></param> /// <returns></returns> public static PrimaryStructure GetPrimaryStructure(string filename) { PrimaryStructure model; StreamReader sr = null; try { sr = new StreamReader(filename); model = new PrimaryStructure(); bool EOF = false; // all indexing is 1 based and will increment before first assignment int atomIndex = 0; int residueIndex = 0; int chainIndex = 0; Chain chain = null; Residue residue = null; int currentResidueID = -1; string currentChainID = ""; bool TERFound = false; while (!EOF) { string record = sr.ReadLine(); // Console.WriteLine("processing Line: " + record); if (record == null || record.Trim().Equals("END")) { EOF = true; } else { if (record == null || record.Trim() == "") { continue; } string recordName = record; if (recordName.Length < 6) { recordName = recordName.Trim(); } else { recordName = record.Substring(0, 6).Trim(); } switch (recordName) { case "ATOM": case "HETATM": int atomResidueID = int.Parse(record.Substring(22, 4).Trim()); if (atomResidueID != currentResidueID) { // store residue - first time here will be null if (residue != null) { model.AddResidue(residueIndex, residue); if (chain != null && currentChainID != "") { chain.AddResidue(residueIndex, residue); } } residueIndex++; currentResidueID = atomResidueID; string residueName = record.Substring(17, 3).Trim(); residue = new Residue(residueIndex, atomResidueID, residueName); } string chainID = record.Substring(21, 1).Trim(); if (currentChainID != chainID || TERFound) { Console.WriteLine("Handling new chain"); if (chain != null && currentChainID != "") { model.AddChain(chain); } if (chainID != null) { chainIndex++; chain = new Chain(chainIndex, chainID); } currentChainID = chainID; TERFound = false; } int atomID = int.Parse(record.Substring(6, 5).Trim()); string atomName = record.Substring(12, 4).Trim(); Element element = ElementHelper.Parse(record.Substring(76, 2).Trim()); float charge = 0; // if the file has extra columns at the end then assume they are for charge and try to parse if (record.Length > 80) { string chargeString = record.Substring(80, record.Length - 80).Trim(); if (chargeString.Length > 0) { try { charge = float.Parse(chargeString); } catch (Exception) { // do nothing } } } // PDB coordinates are in angstroms. Convert to nanometres. Vector3 position = new Vector3(); position.x = float.Parse(record.Substring(30, 8)) / 10; position.y = float.Parse(record.Substring(38, 8)) / 10; position.z = float.Parse(record.Substring(46, 8)) / 10; atomIndex++; Atom atom = new Atom(atomIndex, atomID, atomName, element, position, charge); //Debug.Log("Atom: " + atom); // check for and store main chain elements. if (recordName.Equals("ATOM") && residue.ResidueType != StandardResidue.None) { switch (atom.Name) { case "N": residue.AmineNitrogen = atom; break; case "CA": residue.AlphaCarbon = atom; break; case "C": residue.CarbonylCarbon = atom; break; case "O": case "O1": case "OT1": residue.CarbonylOxygen = atom; break; } } atom.ResidueIndex = residueIndex; atom.ResidueID = currentResidueID; atom.ResidueName = residue.Name; atom.ResidueType = residue.ResidueType; if (currentChainID != "") { atom.ChainID = currentChainID; } residue.Atoms.Add(atomIndex, atom); model.AddAtom(atomIndex, atom); break; case "TER": Console.WriteLine("Found TER"); TERFound = true; break; case "TITLE": if (model.Title == null) { model.Title = ""; } // many PDB's have long remark sections. Abbreviate for now. if (model.Title.Length < 200) { model.Title += record.Substring(7) + "\n"; } break; case "ENDMDL": case "END": // currently only parse the first model in the file EOF = true; break; } } } if (residue != null) { model.AddResidue(residueIndex, residue); if (chain != null) { chain.AddResidue(residueIndex, residue); } } if (chain != null && currentChainID != "") { model.AddChain(chain); } } catch (Exception e) { throw new FileParseException(e.Message); } finally { if (sr != null) { sr.Close(); } } return(model); }
private static PrimaryStructure GetFrame(StreamReader sr) { PrimaryStructure model; // todo: improve file format error handling for misformatted files try { model = new PrimaryStructure(); string titleLine = sr.ReadLine(); // remove any blank lines before title line while (titleLine != null && titleLine.Trim().Equals("")) { titleLine = sr.ReadLine(); } if (titleLine == null) { return(null); } model.Title = titleLine; model.Time = 0; Regex g = new Regex(@"\st=\s*(\d+\.?\d*)"); Match m = g.Match(titleLine); if (m.Success) { string frameTimeString = m.Groups[1].Value; model.Time = float.Parse(frameTimeString); // Console.WriteLine("Success parsing[" + titleLine + "] for frame time"); } else { // no frame time in header. Assume structure file, leave frame time at 0; // return false; } int residueIndex = 0; int chainIndex = 0; Chain chain = null; Residue residue = null; int currentResidueID = -1; Residue lastResidue = null; int atomCount = Int32.Parse(sr.ReadLine().Trim()); for (int i = 0; i < atomCount; i++) { string atomLine = sr.ReadLine(); if (atomLine == null) // if something is wrong with the file, i.e. ends before total atom count { break; } // if atom residue number not the existing residue number store the existing residue and instantiate new residue int atomResidueID = int.Parse(atomLine.Substring(0, 5).Trim()); if (atomResidueID != currentResidueID) { // store residue - first time here will be null if (residue != null) { model.AddResidue(residueIndex, residue); if (chain != null) { chain.AddResidue(residueIndex, residue); } } residueIndex++; currentResidueID = atomResidueID; string residueName = atomLine.Substring(5, 5).Trim(); lastResidue = residue; residue = new Residue(residueIndex, currentResidueID, residueName); // Residue type changes used to capture chain information. May not be 100% accurate but not parsing topology files so options are limited. // Also checking to see if last two atoms before end of amino acid were Oxygen, signifying a chain terminator if ((lastResidue == null || residue.ResidueType != lastResidue.ResidueType) || (lastResidue.ResidueType == StandardResidue.AminoAcid && model.Atoms()[i - 1].Element == Element.O && model.Atoms()[i - 2].Element == Element.O)) { if (chain != null) { model.AddChain(chain); } chainIndex++; chain = new Chain(chainIndex, chainIndex.ToString()); } } int atomIndex = i; int atomID = int.Parse((atomLine.Substring(15, 5)).Trim()); string atomName = atomLine.Substring(10, 5).Trim(); Element element = ElementHelper.Parse(atomName); Vector3 position = new Vector3(); position.x = float.Parse(atomLine.Substring(20, 8)); position.y = float.Parse(atomLine.Substring(28, 8)); position.z = float.Parse(atomLine.Substring(36, 8)); Atom atom = new Atom(atomIndex, atomID, atomName, element, position); // check for and store main chain elements. if (residue.ResidueType != StandardResidue.None) { switch (atom.Name) { case "N": residue.AmineNitrogen = atom; break; case "CA": residue.AlphaCarbon = atom; break; case "C": residue.CarbonylCarbon = atom; break; case "O": case "O1": case "OT1": residue.CarbonylOxygen = atom; break; } } atom.ResidueIndex = residueIndex; atom.ResidueID = currentResidueID; atom.ResidueName = residue.Name; atom.ResidueType = residue.ResidueType; atom.ChainID = chainIndex.ToString(); residue.Atoms.Add(atomIndex, atom); model.AddAtom(atomIndex, atom); } if (residue != null) { model.AddResidue(residueIndex, residue); if (chain != null) { chain.AddResidue(residueIndex, residue); } } if (chain != null) { model.AddChain(chain); } // Parse box vectors. // todo: test and make this more robust for various string lengths float[] vertices = parseBoxLine(sr.ReadLine()); model.OriginalBoundingBox = new BoundingBox(vertices[0], vertices[1], vertices[2], vertices[3], vertices[4], vertices[5], vertices[6], vertices[7], vertices[8]); } catch (Exception e) { throw new FileParseException(e.Message); } return(model); }