예제 #1
0
        /// <summary>
        /// PruneFingerprint
        /// </summary>
        /// <param name="qfp"></param>
        /// <param name="dbfp"></param>
        /// <param name="threshold"></param>
        public void PruneFingerprint(
            FingerprintMx qfp,
            FingerprintMx dbfp,
            double threshold)
        {
            int[] qSetBits = qfp.OnBits;
            int   qn       = qfp.Cardinality;                     // # Number of bits in query fingerprint
            int   qmin     = (int)(Math.Ceiling(qn * threshold)); // # Minimum number of bits in results fingerprints
            int   qmax     = (int)(qn / threshold);               // # Maximum number of bits in results fingerprints
            int   ncommon  = qn - qmin + 1;                       // # Number of fingerprint bits in which at least one must be in common

            // Get list of bits where at least one must be in result fp. Use least popular bits if possible.

            //if (db.mfp_counts)
            //{
            //	reqbits = [count['_id'] for count in db.mfp_counts.find({ '_id': { '$in': qfp} }).sort('count', 1).limit(ncommon)];
            //}
            //else
            //{
            //	reqbits = qfp[:ncommon];
            //}
            //results = [];
            //for (fp in db.molecules.find({ 'mfp.bits': { '$in': reqbits}, 'mfp.count': { '$gte': qmin, '$lte': qmax} }))
            // {
            //	intersection = len(set(qfp) & set(fp['mfp']['bits']));
            //	pn = fp['mfp']['count'];
            //	tanimoto = float(intersection) / (pn + qn - intersection);
            //	if (tanimoto >= threshold)
            //	{
            //		results.append((tanimoto, fp['chembl_id'], fp['smiles']));
            //	}
            //}
            //return results;
        }
예제 #2
0
        /// <summary>
        /// CalculateJaccardSimilarity with another FingerprintMx
        /// </summary>
        /// <param name="fp2"></param>
        /// <returns></returns>
        public double CalculateJaccardSimilarity(FingerprintMx fp2)
        {
            bool[] x = ToBoolArray();
            bool[] y = fp2.ToBoolArray();

            double sim = CalculateJaccardSimilarity(x, y);

            return(sim);
        }
예제 #3
0
        //    # Get list of bits where at least one must be in result fp. Use least popular bits if possible.
        //    if db.mfp_counts:
        //        reqbits = [count['_id'] for count in db.mfp_counts.find({'_id': {'$in': qfp
        //	}
        //}).sort('count', 1).limit(ncommon)]
        //    else:
        //        reqbits = qfp[:ncommon]
        //		results = []
        //    for fp in db.molecules.find({'mfp.bits': {'$in': reqbits}, 'mfp.count': {'$gte': qmin, '$lte': qmax}}):
        //        intersection = len(set(qfp) & set(fp['mfp']['bits']))
        //        pn = fp['mfp']['count']
        //				tanimoto = float(intersection) / (pn + qn - intersection)
        //        if tanimoto >= threshold:
        //            results.append((tanimoto, fp['chembl_id'], fp['smiles']))
        //    return results


        /// <summary>
        /// Test to calculate similarity between pairs of structures (Alt syntax: Call SimSearchMx.Test [cid1] [cid2]
        ///  Tautomers:
        ///  Isotopes:
        ///  Neg Counterion, (Cl-) with 2 quatternary N+ with H attached in main frag:
        ///  Neg Counterion, (I-) with quatternary N+ (no attached H) in main frag:
        ///  Pos Counterion (Li+) with O- in main frag:
        ///  Benzene, cyclohexane:
        ///  StereoIsomers:
        ///  StereoIsomers:
        /// </summary>
        /// <param name="args"></param>
        /// <returns></returns>

        public static string Test(string args)
        {
            string[] sa   = args.Split(' ');
            string   cid1 = CompoundId.Normalize(sa[0]);
            string   cid2 = CompoundId.Normalize(sa[1]);

            int smiLen = 40;

            MoleculeMx           s1   = MoleculeMx.SelectMoleculeForCid(cid1);
            IAtomContainer       mol  = CdkMol.MolfileToAtomContainer(s1.GetMolfileString());
            UniChemData          ucd1 = UniChemUtil.BuildUniChemData(mol);
            List <FingerprintMx> fps1 = UniChemDataToFingerprintMxList(ucd1);

            MoleculeMx           s2   = MoleculeMx.SelectMoleculeForCid(cid2);
            IAtomContainer       mol2 = CdkMol.MolfileToAtomContainer(s2.GetMolfileString());
            UniChemData          ucd2 = UniChemUtil.BuildUniChemData(mol2);
            List <FingerprintMx> fps2 = UniChemDataToFingerprintMxList(ucd2);

            string fps2Smiles = (sa[0] + " / " + sa[1]).PadRight(smiLen);
            string scores     = "";

            for (int i1 = 0; i1 < fps1.Count; i1++)
            {
                FingerprintMx fp1 = fps1[i1];
                for (int i2 = 0; i2 < fps2.Count; i2++)
                {
                    FingerprintMx fp2 = fps2[i2];

                    if (i1 == 0)                     // build smiles headers of cid2 frags if first cid1 frag
                    {
                        fps2Smiles += "\t" + fp2.CanonSmiles.PadRight(smiLen);
                    }

                    if (i2 == 0)
                    {
                        scores += "\r\n" + fp1.CanonSmiles.PadRight(smiLen);                         // include smiles at start of each line
                    }
                    float simScore = CalculateFingerprintPairSimilarityScore(fp1, fp2);
                    scores += "\t" + string.Format("{0:0.00}", simScore).PadRight(smiLen);
                }
            }

            scores = fps2Smiles + scores;

            FileUtil.WriteAndOpenTextDocument("SimilarityScores", scores);

            return("");
        }
예제 #4
0
        /// <summary>
        /// Calculate similarity score between a pair of FingerprintMxs
        /// </summary>
        /// <param name="fp1"></param>
        /// <param name="fp2"></param>
        /// <returns></returns>

        public static float CalculateFingerprintPairSimilarityScore(
            FingerprintMx fp1,
            FingerprintMx fp2)
        {
            long[]     fp1Array  = fp1.ToLongArray();
            OpenBitSet fp1BitSet = new OpenBitSet(fp1Array, fp1Array.Length);
            int        fp1Card   = (int)fp1BitSet.Cardinality();

            long[]     fp2Array  = fp2.ToLongArray();
            OpenBitSet fp2BitSet = new OpenBitSet(fp2Array, fp2Array.Length);
            int        fp2Card   = (int)fp2BitSet.Cardinality();

            fp2BitSet.Intersect(fp1BitSet);
            int   commonCnt = (int)fp2BitSet.Cardinality();
            float simScore  = commonCnt / (float)(fp1Card + fp2Card - commonCnt);

            return(simScore);
        }
예제 #5
0
        /// <summary>
        ///  UniChemDataToFingerprintMxList
        /// </summary>
        /// <param name="ucd"></param>
        /// <returns>FingerprintMx list </returns>

        public static List <FingerprintMx> UniChemDataToFingerprintMxList(UniChemData ucd)
        {
            List <FingerprintMx> fps = new List <FingerprintMx>();
            FingerprintMx        fp  = new FingerprintMx();

            fp.CdkFp       = ucd.Fingerprint;
            fp.CanonSmiles = ucd.CanonSmiles;
            fps.Add(fp);

            foreach (UniChemFIKHBHierarchy child in ucd.Children)
            {
                fp             = new FingerprintMx();
                fp.CdkFp       = child.Fingerprint;
                fp.CanonSmiles = child.CanonSmiles;
                fps.Add(fp);
            }

            return(fps);
        }