Exemplo n.º 1
0
        /// <summary>
        /// Build CDK BitSetFingerprints from an AtomContainer
        /// including the overall fingerprint and a fingerprint for each fragment
        /// </summary>
        /// <param name="molfile"></param>
        /// <param name="fpTypeInt"></param>
        /// <param name="fpSubtype"></param>
        /// <param name="fpLen"></param>
        /// <returns></returns>

        public static List <BitSetFingerprint> BuildBitSetFingerprints(
            string molfile,
            bool includeOverallFingerprint,
            FingerprintType fpType,
            int fpSubtype = -1,
            int fpLen     = -1)
        {
            IAtomContainer    mol2;
            BitSetFingerprint fp;
            string            smiles;
            int fi, fi2;

            IAtomContainer mol = MolfileToAtomContainer(molfile);

            List <BitSetFingerprint> fpList = new List <BitSetFingerprint>();

            int pseudoCount = RemovePseudoAtoms(mol);

            //mol = AtomContainerToSmilesAndBack(mol, out smiles); // need this for kekulization?

            mol = RemoveIsotopesStereoExplicitHydrogens(mol);          // additional normalization for fingerprint

            List <IAtomContainer> frags = FragmentMolecule(mol, true); // get fragments filtering out small and common frags

            if (includeOverallFingerprint && frags.Count > 1)          // include overall fingerprint for multiple fragments
            {
                fp =                                                   // generate a fingerprint
                     CdkFingerprint.BuildBitSetFingerprint(mol, fpType, fpSubtype, fpLen);

                fpList.Add(fp);
            }

            for (fi = 0; fi < frags.Count; fi++)
            {
                mol2 = frags[fi];

                fp = CdkFingerprint.BuildBitSetFingerprint(
                    mol2,
                    fpType,
                    fpSubtype,
                    fpLen);

                for (fi2 = 0; fi2 < fpList.Count; fi2++)                 // see if a dup fp
                {
                    if (fp.Equals(fpList[fi2]))
                    {
                        break;
                    }
                }
                if (fi2 < fpList.Count)
                {
                    continue;                                     // skip if dup
                }
                fpList.Add(fp);
            }

            return(fpList);
        }
Exemplo n.º 2
0
        /// <summary>
        /// Build a CDK BitSetFingerprint for the largest fragment in an AtomContainer
        /// </summary>
        /// <param name="mol"></param>
        /// <param name="fpTypeInt"></param>
        /// <param name="fpSubtype"></param>
        /// <param name="fpLen"></param>
        /// <returns></returns>

        public static BitSetFingerprint BuildBitSetFingerprintForLargestFragment(
            IAtomContainer mol,
            FingerprintType fpType,
            int fpSubtype = -1,
            int fpLen     = -1)
        {
            string smiles;

            mol = GetLargestMoleculeFragment(mol);

            int pseudoCount = RemovePseudoAtoms(mol);

            //mol = AtomContainerToSmilesAndBack(mol, out smiles); // need this for kekulization

            mol = RemoveIsotopesStereoExplicitHydrogens(mol);             // additional normalization for fingerprint

            BitSetFingerprint bfp = CdkFingerprint.BuildBitSetFingerprint(
                mol,
                fpType,
                fpSubtype,
                fpLen);

            return(bfp);
        }
Exemplo n.º 3
0
        static INativeMolMx CdkMolUtil => StaticCdkMol.I;         // static molecule shortcut for utility methods

        /// <summary>
        /// Build UniChem Data
        /// Note that this routine takes about 14 secs the first time it's called, faster thereafter.
        /// </summary>
        /// <param name="mol"></param>
        /// <returns></returns>

        public static UniChemData BuildUniChemData(
            IAtomContainer mol)
        {
            IAtomContainer mol2;
            InChIGenerator ig = null;
            string         molSmiles, fragSmiles = "", molFile = "";
            int            acc, fi, ci;

            //if (Lex.StartsWith(molString, "InChI="))
            //{
            //	sourceInchi = molString;
            //	mol = CdkMol.InChIToAtomContainer(sourceInchi);
            //}

            //else // assume molfile otherwise
            //{
            //	molfile = molString;
            //	mol = CdkMol.MolfileToAtomContainer(molfile);
            //}

            if (mol.Atoms.Count <= 1)
            {
                throw new Exception("Atom count <= 1");
            }

            int pseudoCount = CdkMol.RemovePseudoAtoms(mol);

            mol = CdkMol.AtomContainerToSmilesAndBack(mol, out molSmiles);

            InChIGeneratorFactory igf = InChIGeneratorFactory.Instance;

            try
            {
                //string options = "/KET /15T"; // options to include keto-enol and 1,5-tautomerism (not recognized by CDK)
                ig = igf.GetInChIGenerator(mol);                 //, options);
            }

            catch (Exception ex)             // may fail for some complex mols (e.g. CorpId 12345, a MDL V3000 mol with a CF3 Sgroup/alias)
            {
                throw new Exception(ex.Message, ex);
            }

            //{
            //	try // try to simplify the molfile so that InChI can handle it
            //	{
            //		//if (Lex.IsUndefined(molfile)) throw ex;
            //		string molfile2 = SimplifyMolfileForInChIGeneration(molile);
            //		mol = CdkMol.MolfileToAtomContainer(molfile2);
            //		mol = CdkMol.AtomContainerToSmilesAndBack(mol, out molSmiles);
            //		ig = igf.getInChIGenerator(mol);
            //	}
            //	catch (Exception ex2)
            //	{
            //		throw new Exception(ex2.Message, ex);
            //	}
            //}

            if (!IsAcceptableInchiStatus(ig))
            {
                string errMsg = "InChI generation " + ig.ReturnStatus + ": " + ig.Message;
                molFile = CdkMol.AtomContainerToMolfile(mol);                 // debug
                throw new Exception(errMsg);
            }

            // Populate the UniChem object

            UniChemData icd = new UniChemData();

            //icd.Molfile = molfile;
            icd.AtomContainer = mol;
            icd.InChIString   = ig.InChI;
            icd.InChIKey      = ig.GetInChIKey();
            icd.CanonSmiles   = molSmiles;

            // Build and store fingerprint

            mol = CdkMol.InChIToAtomContainer(icd.InChIString);

            //int hydRemovedCnt = CdkMol.RemoveHydrogensBondedToPositiveNitrogens(mol);
            mol = CdkMol.RemoveIsotopesStereoExplicitHydrogens(mol); // additional normalization for fingerprint

            BitSetFingerprint fp =                                   // generate a fingerprint
                                   CdkFingerprint.BuildBitSetFingerprint(mol, FingerprintType.MACCS, -1, -1);

            icd.Fingerprint = fp;

            if (ConnectivityChecker.IsConnected(mol))
            {
                return(icd);                                                  // single fragment
            }
            //string mf = CdkMol.GetMolecularFormula(mol);

            //AtomContainerSet acs = (AtomContainerSet)ConnectivityChecker.partitionIntoMolecules(mol);
            List <IAtomContainer> frags = CdkMol.FragmentMolecule(mol, true);            // get fragments filtering out small and common frags

            for (fi = 0; fi < frags.Count; fi++)
            {
                mol2 = frags[fi];

                int atomCnt = mol2.Atoms.Count;
                if (atomCnt <= 1)
                {
                    continue;
                }

                try
                {
                    mol2 = CdkMol.AtomContainerToSmilesAndBack(mol2, out fragSmiles);
                }
                catch (Exception ex)
                {
                    AtomContainerToSmilesAndBackErrorCount++;                     // just count error and ignore
                }

                ig = igf.GetInChIGenerator(mol2);
                if (!IsAcceptableInchiStatus(ig))
                {
                    continue;
                }

                string childInChIString = ig.InChI;
                string childInChIKey    = ig.GetInChIKey();
                string childFIKHB       = UniChemUtil.GetFIKHB(childInChIKey);

                mol2 = CdkMol.InChIToAtomContainer(childInChIString);      // convert from inchi
                mol2 = CdkMol.RemoveIsotopesStereoExplicitHydrogens(mol2); // additional normalization for fingerprint

                fp =                                                       // generate a fingerprint for the fragment
                     CdkFingerprint.BuildBitSetFingerprint(mol2, FingerprintType.MACCS, -1, -1);

                for (ci = 0; ci < icd.Children.Count; ci++)                 // see if a dup child
                {
                    if (icd.Children[ci].ChildFIKHB == childFIKHB)
                    {
                        break;
                    }
                }
                if (ci < icd.Children.Count)
                {
                    continue;                                          // skip if dup
                }
                UniChemFIKHBHierarchy icdChild = new UniChemFIKHBHierarchy();
                icdChild.ParentFIKHB = icd.GetFIKHB();
                icdChild.ChildFIKHB  = childFIKHB;
                icdChild.InChIString = childInChIString;
                icdChild.CanonSmiles = fragSmiles;

                icdChild.Fingerprint = fp;

                icd.Children.Add(icdChild);
            }

            return(icd);
        }
Exemplo n.º 4
0
        /// <summary>
        /// BuildFingerprints
        /// </summary>
        public static void BuildFingerprints(
            UniChemData icd)
        {
            IAtomContainer mol, mol2;
            InChIGenerator ig = null;
            int            acc, aci, ci;

            icd.Children.Clear();

            string parentFIKHB = icd.GetFIKHB();

            DateTime t0 = DateTime.Now;

            mol = CdkMol.InChIToAtomContainer(icd.InChIString);

            icd.CanonSmiles = CdkMol.AtomContainerToSmiles(mol);

            InChIToAtomContainerTime += TimeOfDay.Delta(ref t0);

            BitSetFingerprint fp =             // generate a fingerprint
                                   CdkFingerprint.BuildBitSetFingerprint(mol, FingerprintType.MACCS, -1, -1);

            BuildFinterprintTime1 += TimeOfDay.Delta(ref t0);

            icd.Fingerprint = fp;

            if (ConnectivityChecker.IsConnected(mol))
            {
                return;                                                   // single fragment
            }
            InChIGeneratorFactory igf = InChIGeneratorFactory.Instance;

            AtomContainerSet acs = (AtomContainerSet)ConnectivityChecker.PartitionIntoMolecules(mol);

            PartitionIntoMoleculesTime += TimeOfDay.Delta(ref t0);

            acc = acs.Count;

            for (aci = 0; aci < acc; aci++)
            {
                mol2 = acs[aci];
                GetAtomContainerTime += TimeOfDay.Delta(ref t0);

                ig = igf.GetInChIGenerator(mol2);
                if (!IsAcceptableInchiStatus(ig))
                {
                    continue;
                }

                string childKey   = ig.GetInChIKey();
                string childFIKHB = UniChemUtil.GetFIKHB(childKey);
                InChIGeneratorTime += TimeOfDay.Delta(ref t0);

                fp =                 // generate a fingerprint for the fragment
                     CdkFingerprint.BuildBitSetFingerprint(mol2, FingerprintType.MACCS, -1, -1);

                BuildFinterprintTime2 += TimeOfDay.Delta(ref t0);

                for (ci = 0; ci < icd.Children.Count; ci++)                 // see if a dup child
                {
                    if (icd.Children[ci].ChildFIKHB == childFIKHB)
                    {
                        break;
                    }
                }
                if (ci < icd.Children.Count)
                {
                    continue;                                          // skip if dup
                }
                UniChemFIKHBHierarchy fikhbHier = new UniChemFIKHBHierarchy();
                fikhbHier.ParentFIKHB = parentFIKHB;
                fikhbHier.ChildFIKHB  = childFIKHB;
                fikhbHier.CanonSmiles = CdkMol.AtomContainerToSmiles(mol2);
                fikhbHier.Fingerprint = fp;

                icd.Children.Add(fikhbHier);
            }

            return;
        }