/// <summary> /// PruneFingerprint /// </summary> /// <param name="qfp"></param> /// <param name="dbfp"></param> /// <param name="threshold"></param> public void PruneFingerprint( FingerprintMx qfp, FingerprintMx dbfp, double threshold) { int[] qSetBits = qfp.OnBits; int qn = qfp.Cardinality; // # Number of bits in query fingerprint int qmin = (int)(Math.Ceiling(qn * threshold)); // # Minimum number of bits in results fingerprints int qmax = (int)(qn / threshold); // # Maximum number of bits in results fingerprints int ncommon = qn - qmin + 1; // # Number of fingerprint bits in which at least one must be in common // Get list of bits where at least one must be in result fp. Use least popular bits if possible. //if (db.mfp_counts) //{ // reqbits = [count['_id'] for count in db.mfp_counts.find({ '_id': { '$in': qfp} }).sort('count', 1).limit(ncommon)]; //} //else //{ // reqbits = qfp[:ncommon]; //} //results = []; //for (fp in db.molecules.find({ 'mfp.bits': { '$in': reqbits}, 'mfp.count': { '$gte': qmin, '$lte': qmax} })) // { // intersection = len(set(qfp) & set(fp['mfp']['bits'])); // pn = fp['mfp']['count']; // tanimoto = float(intersection) / (pn + qn - intersection); // if (tanimoto >= threshold) // { // results.append((tanimoto, fp['chembl_id'], fp['smiles'])); // } //} //return results; }
/// <summary> /// CalculateJaccardSimilarity with another FingerprintMx /// </summary> /// <param name="fp2"></param> /// <returns></returns> public double CalculateJaccardSimilarity(FingerprintMx fp2) { bool[] x = ToBoolArray(); bool[] y = fp2.ToBoolArray(); double sim = CalculateJaccardSimilarity(x, y); return(sim); }
// # Get list of bits where at least one must be in result fp. Use least popular bits if possible. // if db.mfp_counts: // reqbits = [count['_id'] for count in db.mfp_counts.find({'_id': {'$in': qfp // } //}).sort('count', 1).limit(ncommon)] // else: // reqbits = qfp[:ncommon] // results = [] // for fp in db.molecules.find({'mfp.bits': {'$in': reqbits}, 'mfp.count': {'$gte': qmin, '$lte': qmax}}): // intersection = len(set(qfp) & set(fp['mfp']['bits'])) // pn = fp['mfp']['count'] // tanimoto = float(intersection) / (pn + qn - intersection) // if tanimoto >= threshold: // results.append((tanimoto, fp['chembl_id'], fp['smiles'])) // return results /// <summary> /// Test to calculate similarity between pairs of structures (Alt syntax: Call SimSearchMx.Test [cid1] [cid2] /// Tautomers: /// Isotopes: /// Neg Counterion, (Cl-) with 2 quatternary N+ with H attached in main frag: /// Neg Counterion, (I-) with quatternary N+ (no attached H) in main frag: /// Pos Counterion (Li+) with O- in main frag: /// Benzene, cyclohexane: /// StereoIsomers: /// StereoIsomers: /// </summary> /// <param name="args"></param> /// <returns></returns> public static string Test(string args) { string[] sa = args.Split(' '); string cid1 = CompoundId.Normalize(sa[0]); string cid2 = CompoundId.Normalize(sa[1]); int smiLen = 40; MoleculeMx s1 = MoleculeMx.SelectMoleculeForCid(cid1); IAtomContainer mol = CdkMol.MolfileToAtomContainer(s1.GetMolfileString()); UniChemData ucd1 = UniChemUtil.BuildUniChemData(mol); List <FingerprintMx> fps1 = UniChemDataToFingerprintMxList(ucd1); MoleculeMx s2 = MoleculeMx.SelectMoleculeForCid(cid2); IAtomContainer mol2 = CdkMol.MolfileToAtomContainer(s2.GetMolfileString()); UniChemData ucd2 = UniChemUtil.BuildUniChemData(mol2); List <FingerprintMx> fps2 = UniChemDataToFingerprintMxList(ucd2); string fps2Smiles = (sa[0] + " / " + sa[1]).PadRight(smiLen); string scores = ""; for (int i1 = 0; i1 < fps1.Count; i1++) { FingerprintMx fp1 = fps1[i1]; for (int i2 = 0; i2 < fps2.Count; i2++) { FingerprintMx fp2 = fps2[i2]; if (i1 == 0) // build smiles headers of cid2 frags if first cid1 frag { fps2Smiles += "\t" + fp2.CanonSmiles.PadRight(smiLen); } if (i2 == 0) { scores += "\r\n" + fp1.CanonSmiles.PadRight(smiLen); // include smiles at start of each line } float simScore = CalculateFingerprintPairSimilarityScore(fp1, fp2); scores += "\t" + string.Format("{0:0.00}", simScore).PadRight(smiLen); } } scores = fps2Smiles + scores; FileUtil.WriteAndOpenTextDocument("SimilarityScores", scores); return(""); }
/// <summary> /// Calculate similarity score between a pair of FingerprintMxs /// </summary> /// <param name="fp1"></param> /// <param name="fp2"></param> /// <returns></returns> public static float CalculateFingerprintPairSimilarityScore( FingerprintMx fp1, FingerprintMx fp2) { long[] fp1Array = fp1.ToLongArray(); OpenBitSet fp1BitSet = new OpenBitSet(fp1Array, fp1Array.Length); int fp1Card = (int)fp1BitSet.Cardinality(); long[] fp2Array = fp2.ToLongArray(); OpenBitSet fp2BitSet = new OpenBitSet(fp2Array, fp2Array.Length); int fp2Card = (int)fp2BitSet.Cardinality(); fp2BitSet.Intersect(fp1BitSet); int commonCnt = (int)fp2BitSet.Cardinality(); float simScore = commonCnt / (float)(fp1Card + fp2Card - commonCnt); return(simScore); }
/// <summary> /// UniChemDataToFingerprintMxList /// </summary> /// <param name="ucd"></param> /// <returns>FingerprintMx list </returns> public static List <FingerprintMx> UniChemDataToFingerprintMxList(UniChemData ucd) { List <FingerprintMx> fps = new List <FingerprintMx>(); FingerprintMx fp = new FingerprintMx(); fp.CdkFp = ucd.Fingerprint; fp.CanonSmiles = ucd.CanonSmiles; fps.Add(fp); foreach (UniChemFIKHBHierarchy child in ucd.Children) { fp = new FingerprintMx(); fp.CdkFp = child.Fingerprint; fp.CanonSmiles = child.CanonSmiles; fps.Add(fp); } return(fps); }