public void Merge(ICountFingerprint fp) { var newFp = new Dictionary <int, int>(); { for (int i = 0; i < hitHashes.Length; i++) { newFp.Add(hitHashes[i], numOfHits[i]); } } { for (int i = 0; i < fp.GetNumberOfPopulatedBins(); i++) { if (!newFp.TryGetValue(fp.GetHash(i), out int count)) { count = 0; } newFp[fp.GetHash(i)] = count + fp.GetCount(i); } } var keys = new List <int>(newFp.Keys); keys.Sort(); hitHashes = new int[keys.Count]; numOfHits = new int[keys.Count]; { int i = 0; foreach (var key in keys) { hitHashes[i] = key; numOfHits[i++] = newFp[key]; } } }
/// <summary> /// Calculates Tanimoto distance for two count fingerprints using method 2. /// </summary> /// <remarks> /// <token>cdk-cite-Grant06</token>. /// </remarks> /// <param name="fp1">count fingerprint 1</param> /// <param name="fp2">count fingerprint 2</param> /// <returns>a Tanimoto distance</returns> public static double Method2(ICountFingerprint fp1, ICountFingerprint fp2) { long maxSum = 0, minSum = 0; int i = 0, j = 0; while (i < fp1.GetNumberOfPopulatedBins() || j < fp2.GetNumberOfPopulatedBins()) { int?hash1 = i < fp1.GetNumberOfPopulatedBins() ? fp1.GetHash(i) : (int?)null; int?hash2 = j < fp2.GetNumberOfPopulatedBins() ? fp2.GetHash(j) : (int?)null; int?count1 = i < fp1.GetNumberOfPopulatedBins() ? fp1.GetCount(i) : (int?)null; int?count2 = j < fp2.GetNumberOfPopulatedBins() ? fp2.GetCount(j) : (int?)null; if (count2 == null || (hash1 != null && hash1 < hash2)) { maxSum += count1.Value; i++; continue; } if (count1 == null || (hash2 != null && hash1 > hash2)) { maxSum += count2.Value; j++; continue; } if (hash1.Equals(hash2)) { maxSum += Math.Max(count1.Value, count2.Value); minSum += Math.Min(count1.Value, count2.Value); i++; j++; } } return(((double)minSum) / maxSum); }
public void TestGetCountFingerprint() { var printer = new AtomPairs2DFingerprinter(); var mol1 = parser.ParseSmiles("cccccccccc"); ICountFingerprint icfp = printer.GetCountFingerprint(mol1); Assert.AreEqual(9, icfp.GetNumberOfPopulatedBins()); }
public override void TestGetCountFingerprint() { var fingerprinter = new SignatureFingerprinter(0); var sp = CDK.SmilesParser; var mol = sp.ParseSmiles("O(NC)CC"); ICountFingerprint bitFP = fingerprinter.GetCountFingerprint(mol); Assert.IsNotNull(bitFP); Assert.AreNotSame(0, bitFP.Length); }
public override void TestGetCountFingerprint() { var fpr = new LingoFingerprinter(4); var sp = CDK.SmilesParser; var mol = sp.ParseSmiles("Oc1ccccc1"); ICountFingerprint fp = fpr.GetCountFingerprint(mol); Assert.AreEqual(2, fp.GetCountForHash("cccc".GetHashCode())); Assert.AreEqual(1, fp.GetCountForHash("Oc0c".GetHashCode())); Assert.AreEqual(1, fp.GetCountForHash("c0cc".GetHashCode())); Assert.AreEqual(1, fp.GetCountForHash("0ccc".GetHashCode())); Assert.AreEqual(1, fp.GetCountForHash("ccc0".GetHashCode())); }
public void TestCompaRingBitFingerprintAndCountBehavingAsBit() { var mol1 = TestMoleculeFactory.Make123Triazole(); var mol2 = TestMoleculeFactory.MakeImidazole(); var fingerprinter = new SignatureFingerprinter(1); ICountFingerprint countFp1 = fingerprinter.GetCountFingerprint(mol1); ICountFingerprint countFp2 = fingerprinter.GetCountFingerprint(mol2); countFp1.SetBehaveAsBitFingerprint(true); countFp2.SetBehaveAsBitFingerprint(true); var bitFp1 = fingerprinter.GetBitFingerprint(mol1); var bitFp2 = fingerprinter.GetBitFingerprint(mol2); var bitTanimoto = Tanimoto.Calculate(bitFp1, bitFp2); var countTanimoto1 = Tanimoto.Method1(countFp1, countFp2); var countTanimoto2 = Tanimoto.Method2(countFp1, countFp2); Assert.AreEqual(countTanimoto1, countTanimoto2, 0.001); Assert.AreEqual(bitTanimoto, countTanimoto1, 0.001); }
/// <summary> /// Calculates Tanimoto distance for two count fingerprints using method 1. /// </summary> /// <remarks> /// The feature/count type fingerprints may be of different length. /// Uses Tanimoto method from <token>cdk-cite-Steffen09</token>. /// </remarks> /// <param name="fp1">count fingerprint 1</param> /// <param name="fp2">count fingerprint 2</param> /// <returns>a Tanimoto distance</returns> public static double Method1(ICountFingerprint fp1, ICountFingerprint fp2) { long xy = 0, x = 0, y = 0; for (int i = 0; i < fp1.GetNumberOfPopulatedBins(); i++) { int hash = fp1.GetHash(i); for (int j = 0; j < fp2.GetNumberOfPopulatedBins(); j++) { if (hash == fp2.GetHash(j)) { xy += fp1.GetCount(i) * fp2.GetCount(j); } } x += fp1.GetCount(i) * fp1.GetCount(i); } for (int j = 0; j < fp2.GetNumberOfPopulatedBins(); j++) { y += fp2.GetCount(j) * fp2.GetCount(j); } return((double)xy / (x + y - xy)); }
/// <summary> /// BuildTest /// </summary> public static void BuildTest() { CircularFingerprinter cfp = null; CircularFingerprinterClass FpClass = CircularFingerprinterClass.ECFP6; // FP diameter int FpLen = 2048; // folded binary fp length IAtomContainer mol, mol2; //string molfile = FileUtil.ReadFile(@"C:\Download\CorpId-12345.mol"); //java.io.StringReader sr = new java.io.StringReader(molfile); //if (Lex.Contains(molfile, "v2000")) // cor = new MDLV2000Reader(sr); //else // cor = new MDLV3000Reader(sr); //cor.setReaderMode(IChemObjectReader.Mode.RELAXED); //ac = (IAtomContainer)cor.read(new AtomContainer()); //cor.close(); FpClass = CircularFingerprinterClass.ECFP4; // debug cfp = new CircularFingerprinter(FpClass, FpLen); StreamReader reader = new StreamReader(@"C:\Download\CorpId-12345.mol"); //FileReader FileReader = new FileReader(@"C:\Download\V3000 Mols.sdf"); EnumerableSDFReader rdr = new EnumerableSDFReader(reader.BaseStream, ChemObjectBuilder.Instance); rdr.ReaderMode = ChemObjectReaderMode.Relaxed; IEnumerator <IAtomContainer> cursor = rdr.GetEnumerator(); while (cursor.MoveNext()) { mol = cursor.Current; mol = CdkMol.GetLargestMoleculeFragment(mol); ICountFingerprint cfp1 = cfp.GetCountFingerprint(mol); // get hash values and counts for each cfp.Calculate(mol); int fpCount = cfp.FPCount; for (int fpi = 0; fpi < fpCount; fpi++) // gets { CircularFingerprint cfp2 = cfp.GetFP(fpi); // gets hash, iteration and lists of atoms (dups appear multiple times) } IBitFingerprint bfp = cfp.GetBitFingerprint(mol); BitArray bs = bfp.AsBitSet(); int bsCard = bfp.Cardinality; long bsSize = bfp.Length; continue; } reader.Close(); return; //java.io.StringReader sr = new java.io.StringReader(molfile); //AtomContainer mol = new AtomContainer(); //mol.addAtom(new Atom("C")); //mol.addAtom(new Atom("H")); //mol.addAtom(new Atom("H")); //mol.addAtom(new Atom("H")); //mol.addAtom(new Atom("H")); //mol.addBond(new Bond(mol.getAtom(0), mol.getAtom(1))); //mol.addBond(new Bond(mol.getAtom(0), mol.getAtom(2))); //mol.addBond(new Bond(mol.getAtom(0), mol.getAtom(3))); //mol.addBond(new Bond(mol.getAtom(0), mol.getAtom(4))); //FileReader FileReader = new FileReader(@"C:\Download\CorpId-12345.mol"); //MolReader mr = new MolReader(FileReader, DefaultChemObjectBuilder.getInstance()); //java.io.StringReader sr = new java.io.StringReader(molfile); //IMol m = (IMol)mr.next(); //FileReader.close(); }
public void Merge(ICountFingerprint fp) { }
/// <summary> /// Evaluate continuous Tanimoto coefficient for two feature, count fingerprint representations. /// </summary> /// <remarks> /// Note that feature/count type fingerprints may be of different length. /// Uses Tanimoto method from 10.1021/ci800326z /// </remarks> /// <param name="fp1">The first fingerprint</param> /// <param name="fp2">The second fingerprint</param> /// <returns>The Tanimoto coefficient</returns> /// <seealso cref="Method1(ICountFingerprint, ICountFingerprint)"/> /// <seealso cref="Method2(ICountFingerprint, ICountFingerprint)"/> public static double Calculate(ICountFingerprint fp1, ICountFingerprint fp2) { return(Method2(fp1, fp2)); }