/// <summary> /// ExecuteSearch /// </summary> /// <param name="queryMolfile"></param> /// <param name="databases"></param> /// <param name="minimumSimilarity"></param> /// <param name="maxHits"></param> /// <returns></returns> public List <StructSearchMatch> ExecuteSearch( string queryMolfile, string databases, FingerprintType fpType, double minimumSimilarity, int maxHits) { int fragCnt; IAtomContainer query = CdkMol.MolfileToAtomContainer(queryMolfile); IAtomContainer largestFrag = CdkMol.GetLargestMoleculeFragment(query, out fragCnt); GetCorpSim = Lex.Contains(databases, "corp"); GetChemblSim = Lex.Contains(databases, "chembl"); if (!GetCorpSim && !GetChemblSim) { throw new Exception("Invalid database: " + databases); } FingerprintType = fpType; if (FingerprintType != FingerprintType.MACCS && FingerprintType != FingerprintType.Circular) { throw new Exception("Invalid FingerprintType: " + FingerprintType); } MinimumSimilarity = minimumSimilarity; MaxHits = maxHits; List <StructSearchMatch> matches = ExecuteSearch(largestFrag); return(matches); }
// # Get list of bits where at least one must be in result fp. Use least popular bits if possible. // if db.mfp_counts: // reqbits = [count['_id'] for count in db.mfp_counts.find({'_id': {'$in': qfp // } //}).sort('count', 1).limit(ncommon)] // else: // reqbits = qfp[:ncommon] // results = [] // for fp in db.molecules.find({'mfp.bits': {'$in': reqbits}, 'mfp.count': {'$gte': qmin, '$lte': qmax}}): // intersection = len(set(qfp) & set(fp['mfp']['bits'])) // pn = fp['mfp']['count'] // tanimoto = float(intersection) / (pn + qn - intersection) // if tanimoto >= threshold: // results.append((tanimoto, fp['chembl_id'], fp['smiles'])) // return results /// <summary> /// Test to calculate similarity between pairs of structures (Alt syntax: Call SimSearchMx.Test [cid1] [cid2] /// Tautomers: /// Isotopes: /// Neg Counterion, (Cl-) with 2 quatternary N+ with H attached in main frag: /// Neg Counterion, (I-) with quatternary N+ (no attached H) in main frag: /// Pos Counterion (Li+) with O- in main frag: /// Benzene, cyclohexane: /// StereoIsomers: /// StereoIsomers: /// </summary> /// <param name="args"></param> /// <returns></returns> public static string Test(string args) { string[] sa = args.Split(' '); string cid1 = CompoundId.Normalize(sa[0]); string cid2 = CompoundId.Normalize(sa[1]); int smiLen = 40; MoleculeMx s1 = MoleculeMx.SelectMoleculeForCid(cid1); IAtomContainer mol = CdkMol.MolfileToAtomContainer(s1.GetMolfileString()); UniChemData ucd1 = UniChemUtil.BuildUniChemData(mol); List <FingerprintMx> fps1 = UniChemDataToFingerprintMxList(ucd1); MoleculeMx s2 = MoleculeMx.SelectMoleculeForCid(cid2); IAtomContainer mol2 = CdkMol.MolfileToAtomContainer(s2.GetMolfileString()); UniChemData ucd2 = UniChemUtil.BuildUniChemData(mol2); List <FingerprintMx> fps2 = UniChemDataToFingerprintMxList(ucd2); string fps2Smiles = (sa[0] + " / " + sa[1]).PadRight(smiLen); string scores = ""; for (int i1 = 0; i1 < fps1.Count; i1++) { FingerprintMx fp1 = fps1[i1]; for (int i2 = 0; i2 < fps2.Count; i2++) { FingerprintMx fp2 = fps2[i2]; if (i1 == 0) // build smiles headers of cid2 frags if first cid1 frag { fps2Smiles += "\t" + fp2.CanonSmiles.PadRight(smiLen); } if (i2 == 0) { scores += "\r\n" + fp1.CanonSmiles.PadRight(smiLen); // include smiles at start of each line } float simScore = CalculateFingerprintPairSimilarityScore(fp1, fp2); scores += "\t" + string.Format("{0:0.00}", simScore).PadRight(smiLen); } } scores = fps2Smiles + scores; FileUtil.WriteAndOpenTextDocument("SimilarityScores", scores); return(""); }