// # Get list of bits where at least one must be in result fp. Use least popular bits if possible. // if db.mfp_counts: // reqbits = [count['_id'] for count in db.mfp_counts.find({'_id': {'$in': qfp // } //}).sort('count', 1).limit(ncommon)] // else: // reqbits = qfp[:ncommon] // results = [] // for fp in db.molecules.find({'mfp.bits': {'$in': reqbits}, 'mfp.count': {'$gte': qmin, '$lte': qmax}}): // intersection = len(set(qfp) & set(fp['mfp']['bits'])) // pn = fp['mfp']['count'] // tanimoto = float(intersection) / (pn + qn - intersection) // if tanimoto >= threshold: // results.append((tanimoto, fp['chembl_id'], fp['smiles'])) // return results /// <summary> /// Test to calculate similarity between pairs of structures (Alt syntax: Call SimSearchMx.Test [cid1] [cid2] /// Tautomers: /// Isotopes: /// Neg Counterion, (Cl-) with 2 quatternary N+ with H attached in main frag: /// Neg Counterion, (I-) with quatternary N+ (no attached H) in main frag: /// Pos Counterion (Li+) with O- in main frag: /// Benzene, cyclohexane: /// StereoIsomers: /// StereoIsomers: /// </summary> /// <param name="args"></param> /// <returns></returns> public static string Test(string args) { string[] sa = args.Split(' '); string cid1 = CompoundId.Normalize(sa[0]); string cid2 = CompoundId.Normalize(sa[1]); int smiLen = 40; MoleculeMx s1 = MoleculeMx.SelectMoleculeForCid(cid1); IAtomContainer mol = CdkMol.MolfileToAtomContainer(s1.GetMolfileString()); UniChemData ucd1 = UniChemUtil.BuildUniChemData(mol); List <FingerprintMx> fps1 = UniChemDataToFingerprintMxList(ucd1); MoleculeMx s2 = MoleculeMx.SelectMoleculeForCid(cid2); IAtomContainer mol2 = CdkMol.MolfileToAtomContainer(s2.GetMolfileString()); UniChemData ucd2 = UniChemUtil.BuildUniChemData(mol2); List <FingerprintMx> fps2 = UniChemDataToFingerprintMxList(ucd2); string fps2Smiles = (sa[0] + " / " + sa[1]).PadRight(smiLen); string scores = ""; for (int i1 = 0; i1 < fps1.Count; i1++) { FingerprintMx fp1 = fps1[i1]; for (int i2 = 0; i2 < fps2.Count; i2++) { FingerprintMx fp2 = fps2[i2]; if (i1 == 0) // build smiles headers of cid2 frags if first cid1 frag { fps2Smiles += "\t" + fp2.CanonSmiles.PadRight(smiLen); } if (i2 == 0) { scores += "\r\n" + fp1.CanonSmiles.PadRight(smiLen); // include smiles at start of each line } float simScore = CalculateFingerprintPairSimilarityScore(fp1, fp2); scores += "\t" + string.Format("{0:0.00}", simScore).PadRight(smiLen); } } scores = fps2Smiles + scores; FileUtil.WriteAndOpenTextDocument("SimilarityScores", scores); return(""); }
/// <summary> /// UniChemDataToFingerprintMxList /// </summary> /// <param name="ucd"></param> /// <returns>FingerprintMx list </returns> public static List <FingerprintMx> UniChemDataToFingerprintMxList(UniChemData ucd) { List <FingerprintMx> fps = new List <FingerprintMx>(); FingerprintMx fp = new FingerprintMx(); fp.CdkFp = ucd.Fingerprint; fp.CanonSmiles = ucd.CanonSmiles; fps.Add(fp); foreach (UniChemFIKHBHierarchy child in ucd.Children) { fp = new FingerprintMx(); fp.CdkFp = child.Fingerprint; fp.CanonSmiles = child.CanonSmiles; fps.Add(fp); } return(fps); }