/// <summary> /// Public method to get tautomers for an input molecule, based on the InChI which will be calculated by .NET prot of JNI-InChI. /// </summary> /// <param name="mol">molecule for which to generate tautomers</param> /// <returns>a list of tautomers, if any</returns> /// <exception cref="CDKException"></exception> public ICollection <IAtomContainer> GetTautomers(IAtomContainer mol) { string opt = ""; if ((flags & Options.KetoEnol) != 0) { opt += " -KET"; } if ((flags & Options.OneFiveShift) != 0) { opt += " -15T"; } InChIGenerator gen = InChIGeneratorFactory.Instance.GetInChIGenerator(mol, opt); string inchi = gen.InChI; string aux = gen.AuxInfo; long[] amap = new long[mol.Atoms.Count]; InChINumbersTools.ParseAuxInfo(aux, amap); if (inchi == null) { throw new CDKException($"{typeof(InChIGenerator)} failed to create an InChI for the provided molecule, InChI -> null."); } return(GetTautomers(mol, inchi, amap)); }
/// <summary> /// SimplifyMolfileForInChIGeneration /// </summary> /// <param name="molfile"></param> /// <returns></returns> //static string SimplifyMolfileForInChIGeneration(string molfile) //{ // string smiles = CdkMol.StructureConverter.MolfileStringToSmilesString(molfile); // convert to smiles // string molfile2 = CdkMol.StructureConverter.SmilesStringToMolfileString(smiles); // and back to V2000 molfile without Sgroups // return molfile2; //} /// <summary> /// IsAcceptableInchiStatus /// </summary> /// <param name="ig"></param> /// <returns></returns> static bool IsAcceptableInchiStatus(InChIGenerator ig) { InChIReturnCode inchiStatus = ig.ReturnStatus; if (inchiStatus == InChIReturnCode.Ok) { return(true); } string msg = ig.Message; string log = ig.Log; string auxInfo = ig.AuxInfo; if (inchiStatus == InChIReturnCode.Warning) { return(true); } else { return(false); } }
/// <summary> /// SimplifyMolfileForInChIGeneration /// </summary> /// <param name="molfile"></param> /// <returns></returns> //static string SimplifyMolfileForInChIGeneration(string molfile) //{ // string smiles = CdkMol.StructureConverter.MolfileStringToSmilesString(molfile); // convert to smiles // string molfile2 = CdkMol.StructureConverter.SmilesStringToMolfileString(smiles); // and back to V2000 molfile without Sgroups // return molfile2; //} /// <summary> /// IsAcceptableInchiStatus /// </summary> /// <param name="ig"></param> /// <returns></returns> static bool IsAcceptableInchiStatus(InChIGenerator ig) { INCHI_RET inchiStatus = ig.getReturnStatus(); if (inchiStatus == INCHI_RET.OKAY) { return(true); } string msg = ig.getMessage(); string log = ig.getLog(); string auxInfo = ig.getAuxInfo(); if (inchiStatus == INCHI_RET.WARNING) { return(true); } else { return(false); } }
static INativeMolMx CdkMolUtil => StaticCdkMol.I; // static molecule shortcut for utility methods /// <summary> /// Build UniChem Data /// Note that this routine takes about 14 secs the first time it's called, faster thereafter. /// </summary> /// <param name="mol"></param> /// <returns></returns> public static UniChemData BuildUniChemData( IAtomContainer mol) { IAtomContainer mol2; InChIGenerator ig = null; string molSmiles, fragSmiles = "", molFile = ""; int acc, fi, ci; //if (Lex.StartsWith(molString, "InChI=")) //{ // sourceInchi = molString; // mol = CdkMol.InChIToAtomContainer(sourceInchi); //} //else // assume molfile otherwise //{ // molfile = molString; // mol = CdkMol.MolfileToAtomContainer(molfile); //} if (mol.Atoms.Count <= 1) { throw new Exception("Atom count <= 1"); } int pseudoCount = CdkMol.RemovePseudoAtoms(mol); mol = CdkMol.AtomContainerToSmilesAndBack(mol, out molSmiles); InChIGeneratorFactory igf = InChIGeneratorFactory.Instance; try { //string options = "/KET /15T"; // options to include keto-enol and 1,5-tautomerism (not recognized by CDK) ig = igf.GetInChIGenerator(mol); //, options); } catch (Exception ex) // may fail for some complex mols (e.g. CorpId 12345, a MDL V3000 mol with a CF3 Sgroup/alias) { throw new Exception(ex.Message, ex); } //{ // try // try to simplify the molfile so that InChI can handle it // { // //if (Lex.IsUndefined(molfile)) throw ex; // string molfile2 = SimplifyMolfileForInChIGeneration(molile); // mol = CdkMol.MolfileToAtomContainer(molfile2); // mol = CdkMol.AtomContainerToSmilesAndBack(mol, out molSmiles); // ig = igf.getInChIGenerator(mol); // } // catch (Exception ex2) // { // throw new Exception(ex2.Message, ex); // } //} if (!IsAcceptableInchiStatus(ig)) { string errMsg = "InChI generation " + ig.ReturnStatus + ": " + ig.Message; molFile = CdkMol.AtomContainerToMolfile(mol); // debug throw new Exception(errMsg); } // Populate the UniChem object UniChemData icd = new UniChemData(); //icd.Molfile = molfile; icd.AtomContainer = mol; icd.InChIString = ig.InChI; icd.InChIKey = ig.GetInChIKey(); icd.CanonSmiles = molSmiles; // Build and store fingerprint mol = CdkMol.InChIToAtomContainer(icd.InChIString); //int hydRemovedCnt = CdkMol.RemoveHydrogensBondedToPositiveNitrogens(mol); mol = CdkMol.RemoveIsotopesStereoExplicitHydrogens(mol); // additional normalization for fingerprint BitSetFingerprint fp = // generate a fingerprint CdkFingerprint.BuildBitSetFingerprint(mol, FingerprintType.MACCS, -1, -1); icd.Fingerprint = fp; if (ConnectivityChecker.IsConnected(mol)) { return(icd); // single fragment } //string mf = CdkMol.GetMolecularFormula(mol); //AtomContainerSet acs = (AtomContainerSet)ConnectivityChecker.partitionIntoMolecules(mol); List <IAtomContainer> frags = CdkMol.FragmentMolecule(mol, true); // get fragments filtering out small and common frags for (fi = 0; fi < frags.Count; fi++) { mol2 = frags[fi]; int atomCnt = mol2.Atoms.Count; if (atomCnt <= 1) { continue; } try { mol2 = CdkMol.AtomContainerToSmilesAndBack(mol2, out fragSmiles); } catch (Exception ex) { AtomContainerToSmilesAndBackErrorCount++; // just count error and ignore } ig = igf.GetInChIGenerator(mol2); if (!IsAcceptableInchiStatus(ig)) { continue; } string childInChIString = ig.InChI; string childInChIKey = ig.GetInChIKey(); string childFIKHB = UniChemUtil.GetFIKHB(childInChIKey); mol2 = CdkMol.InChIToAtomContainer(childInChIString); // convert from inchi mol2 = CdkMol.RemoveIsotopesStereoExplicitHydrogens(mol2); // additional normalization for fingerprint fp = // generate a fingerprint for the fragment CdkFingerprint.BuildBitSetFingerprint(mol2, FingerprintType.MACCS, -1, -1); for (ci = 0; ci < icd.Children.Count; ci++) // see if a dup child { if (icd.Children[ci].ChildFIKHB == childFIKHB) { break; } } if (ci < icd.Children.Count) { continue; // skip if dup } UniChemFIKHBHierarchy icdChild = new UniChemFIKHBHierarchy(); icdChild.ParentFIKHB = icd.GetFIKHB(); icdChild.ChildFIKHB = childFIKHB; icdChild.InChIString = childInChIString; icdChild.CanonSmiles = fragSmiles; icdChild.Fingerprint = fp; icd.Children.Add(icdChild); } return(icd); }
/// <summary> /// BuildFingerprints /// </summary> public static void BuildFingerprints( UniChemData icd) { IAtomContainer mol, mol2; InChIGenerator ig = null; int acc, aci, ci; icd.Children.Clear(); string parentFIKHB = icd.GetFIKHB(); DateTime t0 = DateTime.Now; mol = CdkMol.InChIToAtomContainer(icd.InChIString); icd.CanonSmiles = CdkMol.AtomContainerToSmiles(mol); InChIToAtomContainerTime += TimeOfDay.Delta(ref t0); BitSetFingerprint fp = // generate a fingerprint CdkFingerprint.BuildBitSetFingerprint(mol, FingerprintType.MACCS, -1, -1); BuildFinterprintTime1 += TimeOfDay.Delta(ref t0); icd.Fingerprint = fp; if (ConnectivityChecker.IsConnected(mol)) { return; // single fragment } InChIGeneratorFactory igf = InChIGeneratorFactory.Instance; AtomContainerSet acs = (AtomContainerSet)ConnectivityChecker.PartitionIntoMolecules(mol); PartitionIntoMoleculesTime += TimeOfDay.Delta(ref t0); acc = acs.Count; for (aci = 0; aci < acc; aci++) { mol2 = acs[aci]; GetAtomContainerTime += TimeOfDay.Delta(ref t0); ig = igf.GetInChIGenerator(mol2); if (!IsAcceptableInchiStatus(ig)) { continue; } string childKey = ig.GetInChIKey(); string childFIKHB = UniChemUtil.GetFIKHB(childKey); InChIGeneratorTime += TimeOfDay.Delta(ref t0); fp = // generate a fingerprint for the fragment CdkFingerprint.BuildBitSetFingerprint(mol2, FingerprintType.MACCS, -1, -1); BuildFinterprintTime2 += TimeOfDay.Delta(ref t0); for (ci = 0; ci < icd.Children.Count; ci++) // see if a dup child { if (icd.Children[ci].ChildFIKHB == childFIKHB) { break; } } if (ci < icd.Children.Count) { continue; // skip if dup } UniChemFIKHBHierarchy fikhbHier = new UniChemFIKHBHierarchy(); fikhbHier.ParentFIKHB = parentFIKHB; fikhbHier.ChildFIKHB = childFIKHB; fikhbHier.CanonSmiles = CdkMol.AtomContainerToSmiles(mol2); fikhbHier.Fingerprint = fp; icd.Children.Add(fikhbHier); } return; }