public string GetFIKHB() { return(UniChemUtil.GetFIKHB(InChIKey)); } // First InChI Key Hash Block (Connectivity, no stereo, Isomers)
static INativeMolMx CdkMolUtil => StaticCdkMol.I; // static molecule shortcut for utility methods /// <summary> /// Build UniChem Data /// Note that this routine takes about 14 secs the first time it's called, faster thereafter. /// </summary> /// <param name="mol"></param> /// <returns></returns> public static UniChemData BuildUniChemData( IAtomContainer mol) { IAtomContainer mol2; InChIGenerator ig = null; string molSmiles, fragSmiles = "", molFile = ""; int acc, fi, ci; //if (Lex.StartsWith(molString, "InChI=")) //{ // sourceInchi = molString; // mol = CdkMol.InChIToAtomContainer(sourceInchi); //} //else // assume molfile otherwise //{ // molfile = molString; // mol = CdkMol.MolfileToAtomContainer(molfile); //} if (mol.Atoms.Count <= 1) { throw new Exception("Atom count <= 1"); } int pseudoCount = CdkMol.RemovePseudoAtoms(mol); mol = CdkMol.AtomContainerToSmilesAndBack(mol, out molSmiles); InChIGeneratorFactory igf = InChIGeneratorFactory.Instance; try { //string options = "/KET /15T"; // options to include keto-enol and 1,5-tautomerism (not recognized by CDK) ig = igf.GetInChIGenerator(mol); //, options); } catch (Exception ex) // may fail for some complex mols (e.g. CorpId 12345, a MDL V3000 mol with a CF3 Sgroup/alias) { throw new Exception(ex.Message, ex); } //{ // try // try to simplify the molfile so that InChI can handle it // { // //if (Lex.IsUndefined(molfile)) throw ex; // string molfile2 = SimplifyMolfileForInChIGeneration(molile); // mol = CdkMol.MolfileToAtomContainer(molfile2); // mol = CdkMol.AtomContainerToSmilesAndBack(mol, out molSmiles); // ig = igf.getInChIGenerator(mol); // } // catch (Exception ex2) // { // throw new Exception(ex2.Message, ex); // } //} if (!IsAcceptableInchiStatus(ig)) { string errMsg = "InChI generation " + ig.ReturnStatus + ": " + ig.Message; molFile = CdkMol.AtomContainerToMolfile(mol); // debug throw new Exception(errMsg); } // Populate the UniChem object UniChemData icd = new UniChemData(); //icd.Molfile = molfile; icd.AtomContainer = mol; icd.InChIString = ig.InChI; icd.InChIKey = ig.GetInChIKey(); icd.CanonSmiles = molSmiles; // Build and store fingerprint mol = CdkMol.InChIToAtomContainer(icd.InChIString); //int hydRemovedCnt = CdkMol.RemoveHydrogensBondedToPositiveNitrogens(mol); mol = CdkMol.RemoveIsotopesStereoExplicitHydrogens(mol); // additional normalization for fingerprint BitSetFingerprint fp = // generate a fingerprint CdkFingerprint.BuildBitSetFingerprint(mol, FingerprintType.MACCS, -1, -1); icd.Fingerprint = fp; if (ConnectivityChecker.IsConnected(mol)) { return(icd); // single fragment } //string mf = CdkMol.GetMolecularFormula(mol); //AtomContainerSet acs = (AtomContainerSet)ConnectivityChecker.partitionIntoMolecules(mol); List <IAtomContainer> frags = CdkMol.FragmentMolecule(mol, true); // get fragments filtering out small and common frags for (fi = 0; fi < frags.Count; fi++) { mol2 = frags[fi]; int atomCnt = mol2.Atoms.Count; if (atomCnt <= 1) { continue; } try { mol2 = CdkMol.AtomContainerToSmilesAndBack(mol2, out fragSmiles); } catch (Exception ex) { AtomContainerToSmilesAndBackErrorCount++; // just count error and ignore } ig = igf.GetInChIGenerator(mol2); if (!IsAcceptableInchiStatus(ig)) { continue; } string childInChIString = ig.InChI; string childInChIKey = ig.GetInChIKey(); string childFIKHB = UniChemUtil.GetFIKHB(childInChIKey); mol2 = CdkMol.InChIToAtomContainer(childInChIString); // convert from inchi mol2 = CdkMol.RemoveIsotopesStereoExplicitHydrogens(mol2); // additional normalization for fingerprint fp = // generate a fingerprint for the fragment CdkFingerprint.BuildBitSetFingerprint(mol2, FingerprintType.MACCS, -1, -1); for (ci = 0; ci < icd.Children.Count; ci++) // see if a dup child { if (icd.Children[ci].ChildFIKHB == childFIKHB) { break; } } if (ci < icd.Children.Count) { continue; // skip if dup } UniChemFIKHBHierarchy icdChild = new UniChemFIKHBHierarchy(); icdChild.ParentFIKHB = icd.GetFIKHB(); icdChild.ChildFIKHB = childFIKHB; icdChild.InChIString = childInChIString; icdChild.CanonSmiles = fragSmiles; icdChild.Fingerprint = fp; icd.Children.Add(icdChild); } return(icd); }
/// <summary> /// BuildFingerprints /// </summary> public static void BuildFingerprints( UniChemData icd) { IAtomContainer mol, mol2; InChIGenerator ig = null; int acc, aci, ci; icd.Children.Clear(); string parentFIKHB = icd.GetFIKHB(); DateTime t0 = DateTime.Now; mol = CdkMol.InChIToAtomContainer(icd.InChIString); icd.CanonSmiles = CdkMol.AtomContainerToSmiles(mol); InChIToAtomContainerTime += TimeOfDay.Delta(ref t0); BitSetFingerprint fp = // generate a fingerprint CdkFingerprint.BuildBitSetFingerprint(mol, FingerprintType.MACCS, -1, -1); BuildFinterprintTime1 += TimeOfDay.Delta(ref t0); icd.Fingerprint = fp; if (ConnectivityChecker.IsConnected(mol)) { return; // single fragment } InChIGeneratorFactory igf = InChIGeneratorFactory.Instance; AtomContainerSet acs = (AtomContainerSet)ConnectivityChecker.PartitionIntoMolecules(mol); PartitionIntoMoleculesTime += TimeOfDay.Delta(ref t0); acc = acs.Count; for (aci = 0; aci < acc; aci++) { mol2 = acs[aci]; GetAtomContainerTime += TimeOfDay.Delta(ref t0); ig = igf.GetInChIGenerator(mol2); if (!IsAcceptableInchiStatus(ig)) { continue; } string childKey = ig.GetInChIKey(); string childFIKHB = UniChemUtil.GetFIKHB(childKey); InChIGeneratorTime += TimeOfDay.Delta(ref t0); fp = // generate a fingerprint for the fragment CdkFingerprint.BuildBitSetFingerprint(mol2, FingerprintType.MACCS, -1, -1); BuildFinterprintTime2 += TimeOfDay.Delta(ref t0); for (ci = 0; ci < icd.Children.Count; ci++) // see if a dup child { if (icd.Children[ci].ChildFIKHB == childFIKHB) { break; } } if (ci < icd.Children.Count) { continue; // skip if dup } UniChemFIKHBHierarchy fikhbHier = new UniChemFIKHBHierarchy(); fikhbHier.ParentFIKHB = parentFIKHB; fikhbHier.ChildFIKHB = childFIKHB; fikhbHier.CanonSmiles = CdkMol.AtomContainerToSmiles(mol2); fikhbHier.Fingerprint = fp; icd.Children.Add(fikhbHier); } return; }