Esempio n. 1
0
        /// <summary>
        /// Read a <see cref="IChemFile"/> from a file in PDB format. The molecules
        /// in the file are stored as <see cref="IBioPolymer"/>s in the
        /// <see cref="IChemFile"/>. The residues are the monomers of the
        /// <see cref="IBioPolymer"/>, and their names are the concatenation of the
        /// residue, chain id, and the sequence number. Separate chains (denoted by
        /// TER records) are stored as separate <see cref="IBioPolymer"/> molecules.
        /// </summary>
        /// <remarks>
        /// Connectivity information is not currently read.
        /// </remarks>
        /// <returns>The ChemFile that was read from the PDB file.</returns>
        private IChemFile ReadChemFile(IChemFile oFile)
        {
            // initialize all containers
            var oSeq   = oFile.Builder.NewChemSequence();
            var oModel = oFile.Builder.NewChemModel();
            var oSet   = oFile.Builder.NewAtomContainerSet();

            // some variables needed
            var    oBP = new PDBPolymer();
            var    molecularStructure = oFile.Builder.NewAtomContainer();
            string cRead      = "";
            char   chain      = 'A'; // To ensure stringent name giving of monomers
            int    lineLength = 0;

            bool isProteinStructure = false;

            atomNumberMap = new Dictionary <int, IAtom>();
            if (readConnect.IsSet)
            {
                bondsFromConnectRecords = new List <IBond>();
            }

            // do the reading of the Input
            try
            {
                do
                {
                    cRead = oInput.ReadLine();
                    Debug.WriteLine($"Read line: {cRead}");
                    if (cRead != null)
                    {
                        lineLength = cRead.Length;

                        // make sure the record name is 6 characters long
                        if (lineLength < 6)
                        {
                            cRead = cRead + "      ";
                        }
                        // check the first column to decide what to do
                        var cCol = cRead.Substring(0, 6);
                        switch (cCol.ToUpperInvariant())
                        {
                        case "SEQRES":
                        {
                            isProteinStructure = true;
                        }
                        break;

                        case "ATOM  ":
                        {
                            #region
                            // read an atom record
                            var oAtom = ReadAtom(cRead, lineLength);

                            if (isProteinStructure)
                            {
                                // construct a string describing the residue
                                var cResidue = new StringBuilder(8);
                                var oObj     = oAtom.ResName;
                                if (oObj != null)
                                {
                                    cResidue = cResidue.Append(oObj.Trim());
                                }
                                oObj = oAtom.ChainID;
                                if (oObj != null)
                                {
                                    // cResidue = cResidue.Append(((string)oObj).Trim());
                                    cResidue = cResidue.Append(chain);
                                }
                                oObj = oAtom.ResSeq;
                                if (oObj != null)
                                {
                                    cResidue = cResidue.Append(oObj.Trim());
                                }

                                // search for an existing strand or create a new one.
                                var strandName = oAtom.ChainID;
                                if (strandName == null || strandName.Length == 0)
                                {
                                    strandName = chain.ToString(NumberFormatInfo.InvariantInfo);
                                }
                                var oStrand = oBP.GetStrand(strandName);
                                if (oStrand == null)
                                {
                                    oStrand = new PDBStrand
                                    {
                                        StrandName = strandName,
                                        Id         = chain.ToString(NumberFormatInfo.InvariantInfo)
                                    };
                                }

                                // search for an existing monomer or create a new one.
                                var oMonomer = oBP.GetMonomer(cResidue.ToString(), chain.ToString(NumberFormatInfo.InvariantInfo));
                                if (oMonomer == null)
                                {
                                    var monomer = new PDBMonomer
                                    {
                                        MonomerName = cResidue.ToString(),
                                        MonomerType = oAtom.ResName,
                                        ChainID     = oAtom.ChainID,
                                        ICode       = oAtom.ICode,
                                        ResSeq      = oAtom.ResSeq
                                    };
                                    oMonomer = monomer;
                                }

                                // add the atom
                                oBP.AddAtom(oAtom, oMonomer, oStrand);
                            }
                            else
                            {
                                molecularStructure.Atoms.Add(oAtom);
                            }

                            if (readConnect.IsSet)
                            {
                                var isDup = atomNumberMap.ContainsKey(oAtom.Serial.Value);
                                atomNumberMap[oAtom.Serial.Value] = oAtom;
                                if (isDup)
                                {
                                    Trace.TraceWarning($"Duplicate serial ID found for atom: {oAtom}");
                                }
                            }
                            Debug.WriteLine($"Added ATOM: {oAtom}");

                            // As HETATMs cannot be considered to either belong to a certain monomer or strand,
                            // they are dealt with separately.
                            #endregion
                        }
                        break;

                        case "HETATM":
                        {
                            #region
                            // read an atom record
                            var oAtom = ReadAtom(cRead, lineLength);
                            oAtom.HetAtom = true;
                            if (isProteinStructure)
                            {
                                oBP.Atoms.Add(oAtom);
                            }
                            else
                            {
                                molecularStructure.Atoms.Add(oAtom);
                            }
                            var isDup = atomNumberMap.ContainsKey(oAtom.Serial.Value);
                            atomNumberMap[oAtom.Serial.Value] = oAtom;
                            if (isDup)
                            {
                                Trace.TraceWarning($"Duplicate serial ID found for atom: {oAtom}");
                            }

                            Debug.WriteLine($"Added HETATM: {oAtom}");
                            #endregion
                        }
                        break;

                        case "TER   ":
                        {
                            #region
                            // start new strand
                            chain++;
                            var oStrand = new PDBStrand
                            {
                                StrandName = chain.ToString(NumberFormatInfo.InvariantInfo)
                            };
                            Debug.WriteLine("Added new STRAND");
                            #endregion
                        }
                        break;

                        case "END   ":
                        {
                            #region
                            atomNumberMap.Clear();
                            if (isProteinStructure)
                            {
                                // create bonds and finish the molecule
                                oSet.Add(oBP);
                                if (useRebondTool.IsSet)
                                {
                                    try
                                    {
                                        if (!CreateBondsWithRebondTool(oBP))
                                        {
                                            // Get rid of all potentially created bonds.
                                            Trace.TraceInformation("Bonds could not be created using the RebondTool when PDB file was read.");
                                            oBP.Bonds.Clear();
                                        }
                                    }
                                    catch (Exception exception)
                                    {
                                        Trace.TraceInformation("Bonds could not be created when PDB file was read.");
                                        Debug.WriteLine(exception);
                                    }
                                }
                            }
                            else
                            {
                                if (useRebondTool.IsSet)
                                {
                                    CreateBondsWithRebondTool(molecularStructure);
                                }
                                oSet.Add(molecularStructure);
                            }
                            #endregion
                        }
                        break;

                        case "MODEL ":
                        {
                            #region
                            // OK, start a new model and save the current one first *if* it contains atoms
                            if (isProteinStructure)
                            {
                                if (oBP.Atoms.Count > 0)
                                {
                                    // save the model
                                    oSet.Add(oBP);
                                    oModel.MoleculeSet = oSet;
                                    oSeq.Add(oModel);
                                    // setup a new one
                                    oBP    = new PDBPolymer();
                                    oModel = oFile.Builder.NewChemModel();
                                    oSet   = oFile.Builder.NewAtomContainerSet();
                                    // avoid duplicate atom warnings
                                    atomNumberMap.Clear();
                                }
                            }
                            else
                            {
                                if (molecularStructure.Atoms.Count > 0)
                                {
                                    // save the model
                                    oSet.Add(molecularStructure);
                                    oModel.MoleculeSet = oSet;
                                    oSeq.Add(oModel);
                                    // setup a new one
                                    molecularStructure = oFile.Builder.NewAtomContainer();
                                    oModel             = oFile.Builder.NewChemModel();
                                    oSet = oFile.Builder.NewAtomContainerSet();
                                }
                            }
                            #endregion
                        }
                        break;

                        case "REMARK":
                        {
                            #region
                            var comment = oFile.GetProperty <string>(CDKPropertyName.Comment, "");
                            if (lineLength > 12)
                            {
                                comment = comment + cRead.Substring(11).Trim()
                                          + "\n";
                                oFile.SetProperty(CDKPropertyName.Comment, comment);
                            }
                            else
                            {
                                Trace.TraceWarning("REMARK line found without any comment!");
                            }
                            #endregion
                        }
                        break;

                        case "COMPND":
                        {
                            #region
                            var title = cRead.Substring(10).Trim();
                            oFile.SetProperty(CDKPropertyName.Title, title);
                            #endregion
                        }
                        break;

                        case "CONECT":
                        {
                            #region
                            // Read connectivity information from CONECT records. Only
                            // covalent bonds are dealt with. Perhaps salt bridges
                            // should be dealt with in the same way..?
                            if (!readConnect.IsSet)
                            {
                                break;
                            }
                            cRead = cRead.Trim();
                            if (cRead.Length < 16)
                            {
                                Debug.WriteLine($"Skipping unexpected empty CONECT line! : {cRead}");
                            }
                            else
                            {
                                int lineIndex      = 6;
                                int atomFromNumber = -1;
                                int atomToNumber   = -1;
                                var molecule       = (isProteinStructure) ? oBP : molecularStructure;
                                while (lineIndex + 5 <= cRead.Length)
                                {
                                    var part = cRead.Substring(lineIndex, 5).Trim();
                                    if (atomFromNumber == -1)
                                    {
                                        try
                                        {
                                            atomFromNumber = int.Parse(part, NumberFormatInfo.InvariantInfo);
                                        }
                                        catch (FormatException)
                                        {
                                        }
                                    }
                                    else
                                    {
                                        try
                                        {
                                            atomToNumber = int.Parse(part, NumberFormatInfo.InvariantInfo);
                                        }
                                        catch (FormatException)
                                        {
                                            atomToNumber = -1;
                                        }
                                        if (atomFromNumber != -1 && atomToNumber != -1)
                                        {
                                            AddBond(molecule, atomFromNumber, atomToNumber);
                                            Debug.WriteLine($"Bonded {atomFromNumber} with {atomToNumber}");
                                        }
                                    }
                                    lineIndex += 5;
                                }
                            }
                            #endregion
                        }
                        break;

                        case "HELIX ":
                        {
                            #region
                            // HELIX    1 H1A CYS A   11  LYS A   18  1 RESIDUE 18 HAS POSITIVE PHI    1D66  72
                            //           1         2         3         4         5         6         7
                            // 01234567890123456789012345678901234567890123456789012345678901234567890123456789
                            var structure = new PDBStructure
                            {
                                StructureType       = PDBStructure.Helix,
                                StartChainID        = cRead[19],
                                StartSequenceNumber = int.Parse(cRead.Substring(21, 4).Trim(), NumberFormatInfo.InvariantInfo),
                                StartInsertionCode  = cRead[25],
                                EndChainID          = cRead[31],
                                EndSequenceNumber   = int.Parse(cRead.Substring(33, 4).Trim(), NumberFormatInfo.InvariantInfo),
                                EndInsertionCode    = cRead[37]
                            };
                            oBP.Add(structure);
                            #endregion
                        }
                        break;

                        case "SHEET ":
                        {
                            #region
                            var structure = new PDBStructure
                            {
                                StructureType       = PDBStructure.Sheet,
                                StartChainID        = cRead[21],
                                StartSequenceNumber = int.Parse(cRead.Substring(22, 4).Trim(), NumberFormatInfo.InvariantInfo),
                                StartInsertionCode  = cRead[26],
                                EndChainID          = cRead[32],
                                EndSequenceNumber   = int.Parse(cRead.Substring(33, 4).Trim(), NumberFormatInfo.InvariantInfo),
                                EndInsertionCode    = cRead[37]
                            };
                            oBP.Add(structure);
                            #endregion
                        }
                        break;

                        case "TURN  ":
                        {
                            #region
                            var structure = new PDBStructure
                            {
                                StructureType       = PDBStructure.Turn,
                                StartChainID        = cRead[19],
                                StartSequenceNumber = int.Parse(cRead.Substring(20, 4).Trim(), NumberFormatInfo.InvariantInfo),
                                StartInsertionCode  = cRead[24],
                                EndChainID          = cRead[30],
                                EndSequenceNumber   = int.Parse(cRead.Substring(31, 4).Trim(), NumberFormatInfo.InvariantInfo),
                                EndInsertionCode    = cRead[35]
                            };
                            oBP.Add(structure);
                            #endregion
                        }
                        break;

                        default:
                            break;      // ignore all other commands
                        }
                    }
                } while (cRead != null);
            }
            catch (Exception e)
            {
                if (e is IOException || e is ArgumentException)
                {
                    Trace.TraceError("Found a problem at line:");
                    Trace.TraceError(cRead);
                    Trace.TraceError("01234567890123456789012345678901234567890123456789012345678901234567890123456789");
                    Trace.TraceError("          1         2         3         4         5         6         7         ");
                    Trace.TraceError($"  error: {e.Message}");
                    Debug.WriteLine(e);
                    Console.Error.WriteLine(e.StackTrace);
                }
                else
                {
                    throw;
                }
            }

            // try to close the Input
            try
            {
                oInput.Close();
            }
            catch (Exception e)
            {
                Debug.WriteLine(e);
            }

            // Set all the dependencies
            oModel.MoleculeSet = oSet;
            oSeq.Add(oModel);
            oFile.Add(oSeq);

            return(oFile);
        }