Exemplo n.º 1
0
        public void Merge(ICountFingerprint fp)
        {
            var newFp = new Dictionary <int, int>();
            {
                for (int i = 0; i < hitHashes.Length; i++)
                {
                    newFp.Add(hitHashes[i], numOfHits[i]);
                }
            }
            {
                for (int i = 0; i < fp.GetNumberOfPopulatedBins(); i++)
                {
                    if (!newFp.TryGetValue(fp.GetHash(i), out int count))
                    {
                        count = 0;
                    }
                    newFp[fp.GetHash(i)] = count + fp.GetCount(i);
                }
            }
            var keys = new List <int>(newFp.Keys);

            keys.Sort();
            hitHashes = new int[keys.Count];
            numOfHits = new int[keys.Count];
            {
                int i = 0;
                foreach (var key in keys)
                {
                    hitHashes[i]   = key;
                    numOfHits[i++] = newFp[key];
                }
            }
        }
Exemplo n.º 2
0
        /// <summary>
        /// Calculates Tanimoto distance for two count fingerprints using method 2.
        /// </summary>
        /// <remarks>
        /// <token>cdk-cite-Grant06</token>.
        /// </remarks>
        /// <param name="fp1">count fingerprint 1</param>
        /// <param name="fp2">count fingerprint 2</param>
        /// <returns>a Tanimoto distance</returns>
        public static double Method2(ICountFingerprint fp1, ICountFingerprint fp2)
        {
            long maxSum = 0, minSum = 0;
            int  i = 0, j = 0;

            while (i < fp1.GetNumberOfPopulatedBins() || j < fp2.GetNumberOfPopulatedBins())
            {
                int?hash1  = i < fp1.GetNumberOfPopulatedBins() ? fp1.GetHash(i) : (int?)null;
                int?hash2  = j < fp2.GetNumberOfPopulatedBins() ? fp2.GetHash(j) : (int?)null;
                int?count1 = i < fp1.GetNumberOfPopulatedBins() ? fp1.GetCount(i) : (int?)null;
                int?count2 = j < fp2.GetNumberOfPopulatedBins() ? fp2.GetCount(j) : (int?)null;

                if (count2 == null || (hash1 != null && hash1 < hash2))
                {
                    maxSum += count1.Value;
                    i++;
                    continue;
                }
                if (count1 == null || (hash2 != null && hash1 > hash2))
                {
                    maxSum += count2.Value;
                    j++;
                    continue;
                }

                if (hash1.Equals(hash2))
                {
                    maxSum += Math.Max(count1.Value, count2.Value);
                    minSum += Math.Min(count1.Value, count2.Value);
                    i++;
                    j++;
                }
            }
            return(((double)minSum) / maxSum);
        }
Exemplo n.º 3
0
        public void TestGetCountFingerprint()
        {
            var printer            = new AtomPairs2DFingerprinter();
            var mol1               = parser.ParseSmiles("cccccccccc");
            ICountFingerprint icfp = printer.GetCountFingerprint(mol1);

            Assert.AreEqual(9, icfp.GetNumberOfPopulatedBins());
        }
        public override void TestGetCountFingerprint()
        {
            var fingerprinter       = new SignatureFingerprinter(0);
            var sp                  = CDK.SmilesParser;
            var mol                 = sp.ParseSmiles("O(NC)CC");
            ICountFingerprint bitFP = fingerprinter.GetCountFingerprint(mol);

            Assert.IsNotNull(bitFP);
            Assert.AreNotSame(0, bitFP.Length);
        }
Exemplo n.º 5
0
        public override void TestGetCountFingerprint()
        {
            var fpr = new LingoFingerprinter(4);
            var sp  = CDK.SmilesParser;
            var mol = sp.ParseSmiles("Oc1ccccc1");
            ICountFingerprint fp = fpr.GetCountFingerprint(mol);

            Assert.AreEqual(2, fp.GetCountForHash("cccc".GetHashCode()));
            Assert.AreEqual(1, fp.GetCountForHash("Oc0c".GetHashCode()));
            Assert.AreEqual(1, fp.GetCountForHash("c0cc".GetHashCode()));
            Assert.AreEqual(1, fp.GetCountForHash("0ccc".GetHashCode()));
            Assert.AreEqual(1, fp.GetCountForHash("ccc0".GetHashCode()));
        }
        public void TestCompaRingBitFingerprintAndCountBehavingAsBit()
        {
            var mol1 = TestMoleculeFactory.Make123Triazole();
            var mol2 = TestMoleculeFactory.MakeImidazole();

            var fingerprinter          = new SignatureFingerprinter(1);
            ICountFingerprint countFp1 = fingerprinter.GetCountFingerprint(mol1);
            ICountFingerprint countFp2 = fingerprinter.GetCountFingerprint(mol2);

            countFp1.SetBehaveAsBitFingerprint(true);
            countFp2.SetBehaveAsBitFingerprint(true);
            var bitFp1         = fingerprinter.GetBitFingerprint(mol1);
            var bitFp2         = fingerprinter.GetBitFingerprint(mol2);
            var bitTanimoto    = Tanimoto.Calculate(bitFp1, bitFp2);
            var countTanimoto1 = Tanimoto.Method1(countFp1, countFp2);
            var countTanimoto2 = Tanimoto.Method2(countFp1, countFp2);

            Assert.AreEqual(countTanimoto1, countTanimoto2, 0.001);
            Assert.AreEqual(bitTanimoto, countTanimoto1, 0.001);
        }
Exemplo n.º 7
0
        /// <summary>
        /// Calculates Tanimoto distance for two count fingerprints using method 1.
        /// </summary>
        /// <remarks>
        /// The feature/count type fingerprints may be of different length.
        /// Uses Tanimoto method from <token>cdk-cite-Steffen09</token>.
        /// </remarks>
        /// <param name="fp1">count fingerprint 1</param>
        /// <param name="fp2">count fingerprint 2</param>
        /// <returns>a Tanimoto distance</returns>
        public static double Method1(ICountFingerprint fp1, ICountFingerprint fp2)
        {
            long xy = 0, x = 0, y = 0;

            for (int i = 0; i < fp1.GetNumberOfPopulatedBins(); i++)
            {
                int hash = fp1.GetHash(i);
                for (int j = 0; j < fp2.GetNumberOfPopulatedBins(); j++)
                {
                    if (hash == fp2.GetHash(j))
                    {
                        xy += fp1.GetCount(i) * fp2.GetCount(j);
                    }
                }
                x += fp1.GetCount(i) * fp1.GetCount(i);
            }
            for (int j = 0; j < fp2.GetNumberOfPopulatedBins(); j++)
            {
                y += fp2.GetCount(j) * fp2.GetCount(j);
            }
            return((double)xy / (x + y - xy));
        }
Exemplo n.º 8
0
        /// <summary>
        /// BuildTest
        /// </summary>
        public static void BuildTest()
        {
            CircularFingerprinter      cfp     = null;
            CircularFingerprinterClass FpClass = CircularFingerprinterClass.ECFP6; // FP diameter
            int FpLen = 2048;                                                      // folded binary fp length

            IAtomContainer mol, mol2;

            //string molfile = FileUtil.ReadFile(@"C:\Download\CorpId-12345.mol");
            //java.io.StringReader sr = new java.io.StringReader(molfile);
            //if (Lex.Contains(molfile, "v2000"))
            //  cor = new MDLV2000Reader(sr);
            //else
            //  cor = new MDLV3000Reader(sr);

            //cor.setReaderMode(IChemObjectReader.Mode.RELAXED);

            //ac = (IAtomContainer)cor.read(new AtomContainer());
            //cor.close();

            FpClass = CircularFingerprinterClass.ECFP4;             // debug

            cfp = new CircularFingerprinter(FpClass, FpLen);

            StreamReader reader = new StreamReader(@"C:\Download\CorpId-12345.mol");
            //FileReader FileReader = new FileReader(@"C:\Download\V3000 Mols.sdf");

            EnumerableSDFReader rdr = new EnumerableSDFReader(reader.BaseStream, ChemObjectBuilder.Instance);

            rdr.ReaderMode = ChemObjectReaderMode.Relaxed;
            IEnumerator <IAtomContainer> cursor = rdr.GetEnumerator();

            while (cursor.MoveNext())
            {
                mol = cursor.Current;

                mol = CdkMol.GetLargestMoleculeFragment(mol);

                ICountFingerprint cfp1 = cfp.GetCountFingerprint(mol);                 // get hash values and counts for each

                cfp.Calculate(mol);
                int fpCount = cfp.FPCount;
                for (int fpi = 0; fpi < fpCount; fpi++)                 // gets
                {
                    CircularFingerprint cfp2 = cfp.GetFP(fpi);          // gets hash, iteration and lists of atoms (dups appear multiple times)
                }

                IBitFingerprint bfp    = cfp.GetBitFingerprint(mol);
                BitArray        bs     = bfp.AsBitSet();
                int             bsCard = bfp.Cardinality;
                long            bsSize = bfp.Length;
                continue;
            }

            reader.Close();

            return;


            //java.io.StringReader sr = new java.io.StringReader(molfile);
            //AtomContainer mol = new AtomContainer();

            //mol.addAtom(new Atom("C"));
            //mol.addAtom(new Atom("H"));
            //mol.addAtom(new Atom("H"));
            //mol.addAtom(new Atom("H"));
            //mol.addAtom(new Atom("H"));
            //mol.addBond(new Bond(mol.getAtom(0), mol.getAtom(1)));
            //mol.addBond(new Bond(mol.getAtom(0), mol.getAtom(2)));
            //mol.addBond(new Bond(mol.getAtom(0), mol.getAtom(3)));
            //mol.addBond(new Bond(mol.getAtom(0), mol.getAtom(4)));

            //FileReader FileReader = new FileReader(@"C:\Download\CorpId-12345.mol");
            //MolReader mr = new MolReader(FileReader, DefaultChemObjectBuilder.getInstance());
            //java.io.StringReader sr = new java.io.StringReader(molfile);
            //IMol m = (IMol)mr.next();
            //FileReader.close();
        }
Exemplo n.º 9
0
 public void Merge(ICountFingerprint fp)
 {
 }
Exemplo n.º 10
0
 /// <summary>
 /// Evaluate continuous Tanimoto coefficient for two feature, count fingerprint representations.
 /// </summary>
 /// <remarks>
 /// Note that feature/count type fingerprints may be of different length.
 /// Uses Tanimoto method from 10.1021/ci800326z
 /// </remarks>
 /// <param name="fp1">The first fingerprint</param>
 /// <param name="fp2">The second fingerprint</param>
 /// <returns>The Tanimoto coefficient</returns>
 /// <seealso cref="Method1(ICountFingerprint, ICountFingerprint)"/>
 /// <seealso cref="Method2(ICountFingerprint, ICountFingerprint)"/>
 public static double Calculate(ICountFingerprint fp1, ICountFingerprint fp2)
 {
     return(Method2(fp1, fp2));
 }