/// <summary> /// Prepare for FSS matching of supplied query molecule /// </summary> /// <param name="queryMol"></param> public void SetFSSQueryMolecule( INativeMolMx queryMol, string FullStructureSearchType = null) { FSSQueryMolecule = queryMol as CdkMol; return; }
/// <summary> /// Map and hilight a substructure match /// </summary> /// <param name="molfile"></param> /// <returns></returns> public string HilightSSSMatch(string molfile) { // This seems to be slow for some reason when running in the VisualStudio debugger int queryIndex; int[] mappedAtoms, mappedBonds; try { CdkMol m = new CdkMol(MoleculeFormat.Molfile, molfile); if (GetSSSMapping(m, out queryIndex, out mappedAtoms, out mappedBonds)) { throw new NotImplementedException(); } else { return(molfile); } } catch (Exception ex) { return(molfile); // just return the input if exception encountered } }
/// <summary> /// GetMolFormulaDotDisconnect /// </summary> /// <param name="molfile"></param> /// <returns></returns> public string GetMolFormulaDotDisconnect( string molfile) { ICdkMol mol = new CdkMol(MoleculeFormat.Molfile, molfile); string mf = GetMolFormulaDotDisconnect(mol as CdkMol); return(mf); }
/// <summary> /// Get mol weight for a molfile /// </summary> /// <param name="molfile"></param> /// <returns></returns> public double GetMolWeight( string molfile) { CdkMol mol = new CdkMol(MoleculeFormat.Molfile, molfile); double mw = mol.MolWeight; return(mw); }
/// <summary> /// SetMoleculeAndRender /// </summary> /// <param name="format"></param> /// <param name="value"></param> public void SetMoleculeAndRender(MoleculeFormat format, string value) { MolFormat = format; MolString = value; CdkMol = new CdkMol(MolFormat, MolString); RenderMolecule(); return; }
/// <summary> /// Perform a full structure search /// </summary> /// <param name="query"></param> /// <param name="target"></param> /// <param name="switches"></param> /// <returns></returns> public bool FullStructureMatch( ICdkMol query, ICdkMol target, string FullStructureSearchType = null) { CdkMol q = query as CdkMol; CdkMol t = target as CdkMol; var fs = new UniversalIsomorphismTester(); if (fs.isIsomorph(q.NativeMol, t.NativeMol)) { return(true); } else { return(false); } }
/// <summary> /// BuildTestFp /// </summary> /// <returns></returns> public static bool[] BuildTestFp(int corpId) { bool[] ba = null; string fileName = @"C:\Download\CorpId-12345.mol"; if (corpId > 0) { fileName = fileName.Replace("12345", corpId.ToString()); } string molfile = FileUtil.ReadFile(fileName); //string molfile = FileUtil.ReadFile(); IAtomContainer mol = CdkMol.MolfileToAtomContainer(molfile); //int fpClass = CircularFingerprinterClass.ECFP6; // FP diameter //int fpLen = 2048; // folded binary fp length //ba = CdkFingerprint.BuildBoolArray(mol, fpClass, fpLen, true); return(ba); }
/// <summary> /// Perform a full structure search /// </summary> /// <param name="query"></param> /// <param name="target"></param> /// <param name="switches"></param> /// <returns></returns> public bool FullStructureMatch( INativeMolMx query, INativeMolMx target, string FullStructureSearchType = null) { if (query == null || target == null) { return(false); } CdkMol q = query as CdkMol; q.UpdateNativeMolecule(); // be sure up to date if (q?.NativeMol == null) { return(false); } CdkMol t = target as CdkMol; t.UpdateNativeMolecule(); if (t?.NativeMol == null) { return(false); } var fs = new UniversalIsomorphismTester(); if (fs.IsIsomorph(q.NativeMol, t.NativeMol)) { return(true); } else { return(false); } }
static INativeMolMx CdkMolUtil => StaticCdkMol.I; // static molecule shortcut for utility methods /// <summary> /// Build UniChem Data /// Note that this routine takes about 14 secs the first time it's called, faster thereafter. /// </summary> /// <param name="mol"></param> /// <returns></returns> public static UniChemData BuildUniChemData( IAtomContainer mol) { IAtomContainer mol2; InChIGenerator ig = null; string molSmiles, fragSmiles = "", molFile = ""; int acc, fi, ci; //if (Lex.StartsWith(molString, "InChI=")) //{ // sourceInchi = molString; // mol = CdkMol.InChIToAtomContainer(sourceInchi); //} //else // assume molfile otherwise //{ // molfile = molString; // mol = CdkMol.MolfileToAtomContainer(molfile); //} if (mol.Atoms.Count <= 1) { throw new Exception("Atom count <= 1"); } int pseudoCount = CdkMol.RemovePseudoAtoms(mol); mol = CdkMol.AtomContainerToSmilesAndBack(mol, out molSmiles); InChIGeneratorFactory igf = InChIGeneratorFactory.Instance; try { //string options = "/KET /15T"; // options to include keto-enol and 1,5-tautomerism (not recognized by CDK) ig = igf.GetInChIGenerator(mol); //, options); } catch (Exception ex) // may fail for some complex mols (e.g. CorpId 12345, a MDL V3000 mol with a CF3 Sgroup/alias) { throw new Exception(ex.Message, ex); } //{ // try // try to simplify the molfile so that InChI can handle it // { // //if (Lex.IsUndefined(molfile)) throw ex; // string molfile2 = SimplifyMolfileForInChIGeneration(molile); // mol = CdkMol.MolfileToAtomContainer(molfile2); // mol = CdkMol.AtomContainerToSmilesAndBack(mol, out molSmiles); // ig = igf.getInChIGenerator(mol); // } // catch (Exception ex2) // { // throw new Exception(ex2.Message, ex); // } //} if (!IsAcceptableInchiStatus(ig)) { string errMsg = "InChI generation " + ig.ReturnStatus + ": " + ig.Message; molFile = CdkMol.AtomContainerToMolfile(mol); // debug throw new Exception(errMsg); } // Populate the UniChem object UniChemData icd = new UniChemData(); //icd.Molfile = molfile; icd.AtomContainer = mol; icd.InChIString = ig.InChI; icd.InChIKey = ig.GetInChIKey(); icd.CanonSmiles = molSmiles; // Build and store fingerprint mol = CdkMol.InChIToAtomContainer(icd.InChIString); //int hydRemovedCnt = CdkMol.RemoveHydrogensBondedToPositiveNitrogens(mol); mol = CdkMol.RemoveIsotopesStereoExplicitHydrogens(mol); // additional normalization for fingerprint BitSetFingerprint fp = // generate a fingerprint CdkFingerprint.BuildBitSetFingerprint(mol, FingerprintType.MACCS, -1, -1); icd.Fingerprint = fp; if (ConnectivityChecker.IsConnected(mol)) { return(icd); // single fragment } //string mf = CdkMol.GetMolecularFormula(mol); //AtomContainerSet acs = (AtomContainerSet)ConnectivityChecker.partitionIntoMolecules(mol); List <IAtomContainer> frags = CdkMol.FragmentMolecule(mol, true); // get fragments filtering out small and common frags for (fi = 0; fi < frags.Count; fi++) { mol2 = frags[fi]; int atomCnt = mol2.Atoms.Count; if (atomCnt <= 1) { continue; } try { mol2 = CdkMol.AtomContainerToSmilesAndBack(mol2, out fragSmiles); } catch (Exception ex) { AtomContainerToSmilesAndBackErrorCount++; // just count error and ignore } ig = igf.GetInChIGenerator(mol2); if (!IsAcceptableInchiStatus(ig)) { continue; } string childInChIString = ig.InChI; string childInChIKey = ig.GetInChIKey(); string childFIKHB = UniChemUtil.GetFIKHB(childInChIKey); mol2 = CdkMol.InChIToAtomContainer(childInChIString); // convert from inchi mol2 = CdkMol.RemoveIsotopesStereoExplicitHydrogens(mol2); // additional normalization for fingerprint fp = // generate a fingerprint for the fragment CdkFingerprint.BuildBitSetFingerprint(mol2, FingerprintType.MACCS, -1, -1); for (ci = 0; ci < icd.Children.Count; ci++) // see if a dup child { if (icd.Children[ci].ChildFIKHB == childFIKHB) { break; } } if (ci < icd.Children.Count) { continue; // skip if dup } UniChemFIKHBHierarchy icdChild = new UniChemFIKHBHierarchy(); icdChild.ParentFIKHB = icd.GetFIKHB(); icdChild.ChildFIKHB = childFIKHB; icdChild.InChIString = childInChIString; icdChild.CanonSmiles = fragSmiles; icdChild.Fingerprint = fp; icd.Children.Add(icdChild); } return(icd); }
/// <summary> /// BuildFingerprints /// </summary> public static void BuildFingerprints( UniChemData icd) { IAtomContainer mol, mol2; InChIGenerator ig = null; int acc, aci, ci; icd.Children.Clear(); string parentFIKHB = icd.GetFIKHB(); DateTime t0 = DateTime.Now; mol = CdkMol.InChIToAtomContainer(icd.InChIString); icd.CanonSmiles = CdkMol.AtomContainerToSmiles(mol); InChIToAtomContainerTime += TimeOfDay.Delta(ref t0); BitSetFingerprint fp = // generate a fingerprint CdkFingerprint.BuildBitSetFingerprint(mol, FingerprintType.MACCS, -1, -1); BuildFinterprintTime1 += TimeOfDay.Delta(ref t0); icd.Fingerprint = fp; if (ConnectivityChecker.IsConnected(mol)) { return; // single fragment } InChIGeneratorFactory igf = InChIGeneratorFactory.Instance; AtomContainerSet acs = (AtomContainerSet)ConnectivityChecker.PartitionIntoMolecules(mol); PartitionIntoMoleculesTime += TimeOfDay.Delta(ref t0); acc = acs.Count; for (aci = 0; aci < acc; aci++) { mol2 = acs[aci]; GetAtomContainerTime += TimeOfDay.Delta(ref t0); ig = igf.GetInChIGenerator(mol2); if (!IsAcceptableInchiStatus(ig)) { continue; } string childKey = ig.GetInChIKey(); string childFIKHB = UniChemUtil.GetFIKHB(childKey); InChIGeneratorTime += TimeOfDay.Delta(ref t0); fp = // generate a fingerprint for the fragment CdkFingerprint.BuildBitSetFingerprint(mol2, FingerprintType.MACCS, -1, -1); BuildFinterprintTime2 += TimeOfDay.Delta(ref t0); for (ci = 0; ci < icd.Children.Count; ci++) // see if a dup child { if (icd.Children[ci].ChildFIKHB == childFIKHB) { break; } } if (ci < icd.Children.Count) { continue; // skip if dup } UniChemFIKHBHierarchy fikhbHier = new UniChemFIKHBHierarchy(); fikhbHier.ParentFIKHB = parentFIKHB; fikhbHier.ChildFIKHB = childFIKHB; fikhbHier.CanonSmiles = CdkMol.AtomContainerToSmiles(mol2); fikhbHier.Fingerprint = fp; icd.Children.Add(fikhbHier); } return; }
/// <summary> /// BuildTest /// </summary> public static void BuildTest() { CircularFingerprinter cfp = null; CircularFingerprinterClass FpClass = CircularFingerprinterClass.ECFP6; // FP diameter int FpLen = 2048; // folded binary fp length IAtomContainer mol, mol2; //string molfile = FileUtil.ReadFile(@"C:\Download\CorpId-12345.mol"); //java.io.StringReader sr = new java.io.StringReader(molfile); //if (Lex.Contains(molfile, "v2000")) // cor = new MDLV2000Reader(sr); //else // cor = new MDLV3000Reader(sr); //cor.setReaderMode(IChemObjectReader.Mode.RELAXED); //ac = (IAtomContainer)cor.read(new AtomContainer()); //cor.close(); FpClass = CircularFingerprinterClass.ECFP4; // debug cfp = new CircularFingerprinter(FpClass, FpLen); StreamReader reader = new StreamReader(@"C:\Download\CorpId-12345.mol"); //FileReader FileReader = new FileReader(@"C:\Download\V3000 Mols.sdf"); EnumerableSDFReader rdr = new EnumerableSDFReader(reader.BaseStream, ChemObjectBuilder.Instance); rdr.ReaderMode = ChemObjectReaderMode.Relaxed; IEnumerator <IAtomContainer> cursor = rdr.GetEnumerator(); while (cursor.MoveNext()) { mol = cursor.Current; mol = CdkMol.GetLargestMoleculeFragment(mol); ICountFingerprint cfp1 = cfp.GetCountFingerprint(mol); // get hash values and counts for each cfp.Calculate(mol); int fpCount = cfp.FPCount; for (int fpi = 0; fpi < fpCount; fpi++) // gets { CircularFingerprint cfp2 = cfp.GetFP(fpi); // gets hash, iteration and lists of atoms (dups appear multiple times) } IBitFingerprint bfp = cfp.GetBitFingerprint(mol); BitArray bs = bfp.AsBitSet(); int bsCard = bfp.Cardinality; long bsSize = bfp.Length; continue; } reader.Close(); return; //java.io.StringReader sr = new java.io.StringReader(molfile); //AtomContainer mol = new AtomContainer(); //mol.addAtom(new Atom("C")); //mol.addAtom(new Atom("H")); //mol.addAtom(new Atom("H")); //mol.addAtom(new Atom("H")); //mol.addAtom(new Atom("H")); //mol.addBond(new Bond(mol.getAtom(0), mol.getAtom(1))); //mol.addBond(new Bond(mol.getAtom(0), mol.getAtom(2))); //mol.addBond(new Bond(mol.getAtom(0), mol.getAtom(3))); //mol.addBond(new Bond(mol.getAtom(0), mol.getAtom(4))); //FileReader FileReader = new FileReader(@"C:\Download\CorpId-12345.mol"); //MolReader mr = new MolReader(FileReader, DefaultChemObjectBuilder.getInstance()); //java.io.StringReader sr = new java.io.StringReader(molfile); //IMol m = (IMol)mr.next(); //FileReader.close(); }