Beispiel #1
0
        /// <summary>
        /// SimplifyMolfileForInChIGeneration
        /// </summary>
        /// <param name="molfile"></param>
        /// <returns></returns>

        //static string SimplifyMolfileForInChIGeneration(string molfile)
        //{
        //	string smiles = CdkMol.StructureConverter.MolfileStringToSmilesString(molfile); // convert to smiles
        //	string molfile2 = CdkMol.StructureConverter.SmilesStringToMolfileString(smiles); // and back to V2000 molfile without Sgroups
        //	return molfile2;
        //}

        /// <summary>
        /// IsAcceptableInchiStatus
        /// </summary>
        /// <param name="ig"></param>
        /// <returns></returns>

        static bool IsAcceptableInchiStatus(InChIGenerator ig)
        {
            INCHI_RET inchiStatus = ig.getReturnStatus();

            if (inchiStatus == INCHI_RET.OKAY)
            {
                return(true);
            }

            string msg     = ig.getMessage();
            string log     = ig.getLog();
            string auxInfo = ig.getAuxInfo();

            if (inchiStatus == INCHI_RET.WARNING)
            {
                return(true);
            }

            else
            {
                return(false);
            }
        }
Beispiel #2
0
        static ICdkMol CdkMolUtil => StaticCdkMol.I;         // static molecule shortcut for utility methods

        /// <summary>
        /// Build UniChem Data
        /// Note that this routine takes about 14 secs the first time it's called, faster thereafter.
        /// </summary>
        /// <param name="mol"></param>
        /// <returns></returns>

        public static UniChemData BuildUniChemData(
            IAtomContainer mol)
        {
            IAtomContainer mol2;
            InChIGenerator ig = null;
            string         molSmiles, fragSmiles = "", molFile = "";
            int            acc, fi, ci;

            //if (Lex.StartsWith(molString, "InChI="))
            //{
            //	sourceInchi = molString;
            //	mol = CdkMol.InChIToAtomContainer(sourceInchi);
            //}

            //else // assume molfile otherwise
            //{
            //	molfile = molString;
            //	mol = CdkMol.MolfileToAtomContainer(molfile);
            //}

            if (mol.getAtomCount() <= 1)
            {
                throw new Exception("Atom count <= 1");
            }

            int pseudoCount = CdkMol.RemovePseudoAtoms(mol);

            mol = CdkMol.AtomContainerToSmilesAndBack(mol, out molSmiles);

            InChIGeneratorFactory igf = InChIGeneratorFactory.getInstance();

            try
            {
                //string options = "/KET /15T"; // options to include keto-enol and 1,5-tautomerism (not recognized by CDK)
                ig = igf.getInChIGenerator(mol);                 //, options);
            }

            catch (Exception ex)             // may fail for some complex mols (e.g. CorpId 12345, a MDL V3000 mol with a CF3 Sgroup/alias)
            {
                throw new Exception(ex.Message, ex);
            }

            //{
            //	try // try to simplify the molfile so that InChI can handle it
            //	{
            //		//if (Lex.IsUndefined(molfile)) throw ex;
            //		string molfile2 = SimplifyMolfileForInChIGeneration(molile);
            //		mol = CdkMol.MolfileToAtomContainer(molfile2);
            //		mol = CdkMol.AtomContainerToSmilesAndBack(mol, out molSmiles);
            //		ig = igf.getInChIGenerator(mol);
            //	}
            //	catch (Exception ex2)
            //	{
            //		throw new Exception(ex2.Message, ex);
            //	}
            //}

            if (!IsAcceptableInchiStatus(ig))
            {
                string errMsg = "InChI generation " + ig.getReturnStatus() + ": " + ig.getMessage();
                molFile = CdkMol.AtomContainerToMolfile(mol);                 // debug
                throw new Exception(errMsg);
            }

            // Populate the UniChem object

            UniChemData icd = new UniChemData();

            //icd.Molfile = molfile;
            icd.AtomContainer = mol;
            icd.InChIString   = ig.getInchi();
            icd.InChIKey      = ig.getInchiKey();
            icd.CanonSmiles   = molSmiles;

            // Build and store fingerprint

            mol = CdkMol.InChIToAtomContainer(icd.InChIString);

            //int hydRemovedCnt = CdkMol.RemoveHydrogensBondedToPositiveNitrogens(mol);
            mol = CdkMol.RemoveIsotopesStereoExplicitHydrogens(mol); // additional normalization for fingerprint

            BitSetFingerprint fp =                                   // generate a fingerprint
                                   CdkFingerprint.BuildBitSetFingerprint(mol, FingerprintType.MACCS, -1, -1);

            icd.Fingerprint = fp;

            if (ConnectivityChecker.isConnected(mol))
            {
                return(icd);                                                  // single fragment
            }
            //string mf = CdkMol.GetMolecularFormula(mol);

            //AtomContainerSet acs = (AtomContainerSet)ConnectivityChecker.partitionIntoMolecules(mol);
            List <IAtomContainer> frags = CdkMol.FragmentMolecule(mol, true);            // get fragments filtering out small and common frags

            for (fi = 0; fi < frags.Count; fi++)
            {
                mol2 = frags[fi];

                int atomCnt = mol2.getAtomCount();
                if (atomCnt <= 1)
                {
                    continue;
                }

                try
                {
                    mol2 = CdkMol.AtomContainerToSmilesAndBack(mol2, out fragSmiles);
                }
                catch (Exception ex)
                {
                    AtomContainerToSmilesAndBackErrorCount++;                     // just count error and ignore
                }

                ig = igf.getInChIGenerator(mol2);
                if (!IsAcceptableInchiStatus(ig))
                {
                    continue;
                }

                string childInChIString = ig.getInchi();
                string childInChIKey    = ig.getInchiKey();
                string childFIKHB       = UniChemUtil.GetFIKHB(childInChIKey);

                mol2 = CdkMol.InChIToAtomContainer(childInChIString);      // convert from inchi
                mol2 = CdkMol.RemoveIsotopesStereoExplicitHydrogens(mol2); // additional normalization for fingerprint

                fp =                                                       // generate a fingerprint for the fragment
                     CdkFingerprint.BuildBitSetFingerprint(mol2, FingerprintType.MACCS, -1, -1);

                for (ci = 0; ci < icd.Children.Count; ci++)                 // see if a dup child
                {
                    if (icd.Children[ci].ChildFIKHB == childFIKHB)
                    {
                        break;
                    }
                }
                if (ci < icd.Children.Count)
                {
                    continue;                                          // skip if dup
                }
                UniChemFIKHBHierarchy icdChild = new UniChemFIKHBHierarchy();
                icdChild.ParentFIKHB = icd.GetFIKHB();
                icdChild.ChildFIKHB  = childFIKHB;
                icdChild.InChIString = childInChIString;
                icdChild.CanonSmiles = fragSmiles;

                icdChild.Fingerprint = fp;

                icd.Children.Add(icdChild);
            }

            return(icd);
        }