/// <summary> /// SimplifyMolfileForInChIGeneration /// </summary> /// <param name="molfile"></param> /// <returns></returns> //static string SimplifyMolfileForInChIGeneration(string molfile) //{ // string smiles = CdkMol.StructureConverter.MolfileStringToSmilesString(molfile); // convert to smiles // string molfile2 = CdkMol.StructureConverter.SmilesStringToMolfileString(smiles); // and back to V2000 molfile without Sgroups // return molfile2; //} /// <summary> /// IsAcceptableInchiStatus /// </summary> /// <param name="ig"></param> /// <returns></returns> static bool IsAcceptableInchiStatus(InChIGenerator ig) { INCHI_RET inchiStatus = ig.getReturnStatus(); if (inchiStatus == INCHI_RET.OKAY) { return(true); } string msg = ig.getMessage(); string log = ig.getLog(); string auxInfo = ig.getAuxInfo(); if (inchiStatus == INCHI_RET.WARNING) { return(true); } else { return(false); } }
static ICdkMol CdkMolUtil => StaticCdkMol.I; // static molecule shortcut for utility methods /// <summary> /// Build UniChem Data /// Note that this routine takes about 14 secs the first time it's called, faster thereafter. /// </summary> /// <param name="mol"></param> /// <returns></returns> public static UniChemData BuildUniChemData( IAtomContainer mol) { IAtomContainer mol2; InChIGenerator ig = null; string molSmiles, fragSmiles = "", molFile = ""; int acc, fi, ci; //if (Lex.StartsWith(molString, "InChI=")) //{ // sourceInchi = molString; // mol = CdkMol.InChIToAtomContainer(sourceInchi); //} //else // assume molfile otherwise //{ // molfile = molString; // mol = CdkMol.MolfileToAtomContainer(molfile); //} if (mol.getAtomCount() <= 1) { throw new Exception("Atom count <= 1"); } int pseudoCount = CdkMol.RemovePseudoAtoms(mol); mol = CdkMol.AtomContainerToSmilesAndBack(mol, out molSmiles); InChIGeneratorFactory igf = InChIGeneratorFactory.getInstance(); try { //string options = "/KET /15T"; // options to include keto-enol and 1,5-tautomerism (not recognized by CDK) ig = igf.getInChIGenerator(mol); //, options); } catch (Exception ex) // may fail for some complex mols (e.g. CorpId 12345, a MDL V3000 mol with a CF3 Sgroup/alias) { throw new Exception(ex.Message, ex); } //{ // try // try to simplify the molfile so that InChI can handle it // { // //if (Lex.IsUndefined(molfile)) throw ex; // string molfile2 = SimplifyMolfileForInChIGeneration(molile); // mol = CdkMol.MolfileToAtomContainer(molfile2); // mol = CdkMol.AtomContainerToSmilesAndBack(mol, out molSmiles); // ig = igf.getInChIGenerator(mol); // } // catch (Exception ex2) // { // throw new Exception(ex2.Message, ex); // } //} if (!IsAcceptableInchiStatus(ig)) { string errMsg = "InChI generation " + ig.getReturnStatus() + ": " + ig.getMessage(); molFile = CdkMol.AtomContainerToMolfile(mol); // debug throw new Exception(errMsg); } // Populate the UniChem object UniChemData icd = new UniChemData(); //icd.Molfile = molfile; icd.AtomContainer = mol; icd.InChIString = ig.getInchi(); icd.InChIKey = ig.getInchiKey(); icd.CanonSmiles = molSmiles; // Build and store fingerprint mol = CdkMol.InChIToAtomContainer(icd.InChIString); //int hydRemovedCnt = CdkMol.RemoveHydrogensBondedToPositiveNitrogens(mol); mol = CdkMol.RemoveIsotopesStereoExplicitHydrogens(mol); // additional normalization for fingerprint BitSetFingerprint fp = // generate a fingerprint CdkFingerprint.BuildBitSetFingerprint(mol, FingerprintType.MACCS, -1, -1); icd.Fingerprint = fp; if (ConnectivityChecker.isConnected(mol)) { return(icd); // single fragment } //string mf = CdkMol.GetMolecularFormula(mol); //AtomContainerSet acs = (AtomContainerSet)ConnectivityChecker.partitionIntoMolecules(mol); List <IAtomContainer> frags = CdkMol.FragmentMolecule(mol, true); // get fragments filtering out small and common frags for (fi = 0; fi < frags.Count; fi++) { mol2 = frags[fi]; int atomCnt = mol2.getAtomCount(); if (atomCnt <= 1) { continue; } try { mol2 = CdkMol.AtomContainerToSmilesAndBack(mol2, out fragSmiles); } catch (Exception ex) { AtomContainerToSmilesAndBackErrorCount++; // just count error and ignore } ig = igf.getInChIGenerator(mol2); if (!IsAcceptableInchiStatus(ig)) { continue; } string childInChIString = ig.getInchi(); string childInChIKey = ig.getInchiKey(); string childFIKHB = UniChemUtil.GetFIKHB(childInChIKey); mol2 = CdkMol.InChIToAtomContainer(childInChIString); // convert from inchi mol2 = CdkMol.RemoveIsotopesStereoExplicitHydrogens(mol2); // additional normalization for fingerprint fp = // generate a fingerprint for the fragment CdkFingerprint.BuildBitSetFingerprint(mol2, FingerprintType.MACCS, -1, -1); for (ci = 0; ci < icd.Children.Count; ci++) // see if a dup child { if (icd.Children[ci].ChildFIKHB == childFIKHB) { break; } } if (ci < icd.Children.Count) { continue; // skip if dup } UniChemFIKHBHierarchy icdChild = new UniChemFIKHBHierarchy(); icdChild.ParentFIKHB = icd.GetFIKHB(); icdChild.ChildFIKHB = childFIKHB; icdChild.InChIString = childInChIString; icdChild.CanonSmiles = fragSmiles; icdChild.Fingerprint = fp; icd.Children.Add(icdChild); } return(icd); }