public void TestFingerprint() { IFingerprinter printer = new PubchemFingerprinter(); var adder = CDK.HydrogenAdder; var mol1 = parser.ParseSmiles("c1ccccc1CCc1ccccc1"); var mol2 = parser.ParseSmiles("c1ccccc1CC"); AtomContainerManipulator.PercieveAtomTypesAndConfigureAtoms(mol1); AtomContainerManipulator.PercieveAtomTypesAndConfigureAtoms(mol2); adder.AddImplicitHydrogens(mol1); adder.AddImplicitHydrogens(mol2); AtomContainerManipulator.ConvertImplicitToExplicitHydrogens(mol1); AtomContainerManipulator.ConvertImplicitToExplicitHydrogens(mol2); Aromaticity.CDKLegacy.Apply(mol1); Aromaticity.CDKLegacy.Apply(mol2); BitArray bs1 = printer.GetBitFingerprint(mol1).AsBitSet(); BitArray bs2 = printer.GetBitFingerprint(mol2).AsBitSet(); Assert.AreEqual(881, printer.Length); Assert.IsFalse(FingerprinterTool.IsSubset(bs1, bs2), "c1ccccc1CC was detected as a subset of c1ccccc1CCc1ccccc1"); }
public override void TestBug934819() { IAtomContainer subStructure = Bug934819_1(); IAtomContainer superStructure = Bug934819_2(); AtomContainerManipulator.PercieveAtomTypesAndConfigureAtoms(superStructure); AtomContainerManipulator.PercieveAtomTypesAndConfigureAtoms(subStructure); AddImplicitHydrogens(superStructure); AddImplicitHydrogens(subStructure); IFingerprinter fpr = new PubchemFingerprinter(); IBitFingerprint superBits = fpr.GetBitFingerprint(superStructure); IBitFingerprint subBits = fpr.GetBitFingerprint(subStructure); Assert.IsTrue(BitArrays.Equals( AsBitSet(9, 10, 14, 18, 19, 33, 143, 146, 255, 256, 283, 284, 285, 293, 301, 332, 344, 349, 351, 353, 355, 368, 370, 371, 376, 383, 384, 395, 401, 412, 416, 421, 423, 434, 441, 446, 449, 454, 455, 464, 470, 471, 480, 489, 490, 500, 502, 507, 513, 514, 516, 520, 524, 531, 532, 545, 546, 549, 552, 556, 558, 564, 570, 586, 592, 599, 600, 607, 633, 658, 665), subBits.AsBitSet())); Assert.IsTrue(BitArrays.Equals( AsBitSet(9, 10, 11, 14, 18, 19, 33, 34, 143, 146, 150, 153, 255, 256, 257, 258, 283, 284, 285, 293, 301, 332, 344, 349, 351, 353, 355, 368, 370, 371, 374, 376, 383, 384, 395, 401, 412, 416, 417, 421, 423, 427, 434, 441, 446, 449, 454, 455, 460, 464, 470, 471, 479, 480, 489, 490, 500, 502, 507, 513, 514, 516, 520, 524, 531, 532, 545, 546, 549, 552, 556, 558, 564, 570, 578, 582, 584, 586, 592, 595, 600, 603, 607, 608, 633, 634, 640, 658, 660, 664, 665, 668, 677, 678, 683), superBits.AsBitSet())); }
public static void Main() { { #region var molecule = new AtomContainer(); IFingerprinter fingerprinter = new PubchemFingerprinter(Silent.ChemObjectBuilder.Instance); IBitFingerprint fingerprint = fingerprinter.GetBitFingerprint(molecule); Console.WriteLine(fingerprint.Length); // returns 881 #endregion } }
public void TestDecode() { BitArray bitSet = PubchemFingerprinter .Decode("AAADcYBgAAAAAAAAAAAAAAAAAAAAAAAAAAAwAAAAAAAAAAABAAAAGAAAAAAACACAEAAwAIAAAACAACBCAAACAAAgAAAIiAAAAIgIICKAERCAIAAggAAIiAcAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=="); int[] setBits = new int[] { 0, 9, 10, 178, 179, 255, 283, 284, 332, 344, 355, 370, 371, 384, 416, 434, 441, 446, 470, 490, 516, 520, 524, 552, 556, 564, 570, 578, 582, 584, 595, 599, 603, 608, 618, 634, 640, 660, 664, 668, 677, 678, 679 }; foreach (var set in setBits) { Assert.IsTrue(bitSet[set], "bit " + set + " was not set"); } }
public void Call() { BitArray fp = null; IFingerprinter fpr = new PubchemFingerprinter(); try { fp = fpr.GetBitFingerprint(mol).AsBitSet(); } catch (CDKException e) { Console.Error.WriteLine(e.StackTrace); //To change body of catch statement use File | Settings | File Templates. } Result = fp; }
public void TestBenzene() { var mol = parser.ParseSmiles("c1ccccc1"); AtomContainerManipulator.PercieveAtomTypesAndConfigureAtoms(mol); var adder = CDK.HydrogenAdder; adder.AddImplicitHydrogens(mol); AtomContainerManipulator.ConvertImplicitToExplicitHydrogens(mol); Aromaticity.CDKLegacy.Apply(mol); IFingerprinter printer = new PubchemFingerprinter(); BitArray fp = printer.GetBitFingerprint(mol).AsBitSet(); BitArray ref_ = PubchemFingerprinter .Decode("AAADcYBgAAAAAAAAAAAAAAAAAAAAAAAAAAAwAAAAAAAAAAABAAAAGAAAAAAACACAEAAwAIAAAACAACBCAAACAAAgAAAIiAAAAIgIICKAERCAIAAggAAIiAcAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=="); Assert.IsTrue(BitArrays.Equals(ref_, fp)); }
public void TestCID25181289() { var mol = parser.ParseSmiles("C=C(C1=CC=C(C=C1)O)NNC2=C(C(=NC(=C2Cl)Cl)C(=O)O)Cl"); AtomContainerManipulator.PercieveAtomTypesAndConfigureAtoms(mol); var adder = CDK.HydrogenAdder; adder.AddImplicitHydrogens(mol); AtomContainerManipulator.ConvertImplicitToExplicitHydrogens(mol); Aromaticity.CDKLegacy.Apply(mol); IFingerprinter printer = new PubchemFingerprinter(); BitArray fp = printer.GetBitFingerprint(mol).AsBitSet(); BitArray ref_ = PubchemFingerprinter .Decode("AAADccBzMAAGAAAAAAAAAAAAAAAAAAAAAAA8QAAAAAAAAAABwAAAHgIYCAAADA6BniAwzpJqEgCoAyTyTASChCAnJiIYumGmTtgKJnLD1/PEdQhkwBHY3Qe82AAOIAAAAAAAAABAAAAAAAAAAAAAAAAAAA=="); Assert.IsTrue(BitArrays.Equals(ref_, fp)); }
public void TestCID5934166() { var mol = parser.ParseSmiles("C1=CC=C(C=C1)C[N+]2=C(C=C(C=C2C=CC3=CC=CC=C3)C4=CC=CC=C4)C5=CC=CC=C5"); AtomContainerManipulator.PercieveAtomTypesAndConfigureAtoms(mol); var adder = CDK.HydrogenAdder; adder.AddImplicitHydrogens(mol); AtomContainerManipulator.ConvertImplicitToExplicitHydrogens(mol); Aromaticity.CDKLegacy.Apply(mol); IFingerprinter printer = new PubchemFingerprinter(); BitArray fp = printer.GetBitFingerprint(mol).AsBitSet(); BitArray ref_ = PubchemFingerprinter .Decode("AAADceB+AAAAAAAAAAAAAAAAAAAAAAAAAAA8YMGCAAAAAAAB1AAAHAAAAAAADAjBHgQwgJMMEACgAyRiRACCgCAhAiAI2CA4ZJgIIOLAkZGEIAhggADIyAcQgMAOgAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=="); Assert.IsTrue(BitArrays.Equals(ref_, fp)); }
public void TestCID2518130() { var mol = parser.ParseSmiles("COC1C(C(C(C(O1)CO)OC2C(C(C(C(O2)CO)S)O)O)O)O"); AtomContainerManipulator.PercieveAtomTypesAndConfigureAtoms(mol); var adder = CDK.HydrogenAdder; adder.AddImplicitHydrogens(mol); AtomContainerManipulator.ConvertImplicitToExplicitHydrogens(mol); Aromaticity.CDKLegacy.Apply(mol); IFingerprinter printer = new PubchemFingerprinter(); BitArray fp = printer.GetBitFingerprint(mol).AsBitSet(); BitArray ref_ = PubchemFingerprinter .Decode("AAADceBwPABAAAAAAAAAAAAAAAAAAAAAAAAkSAAAAAAAAAAAAAAAGgQACAAACBS0wAOCCAAABgQAAAAAAAAAAAAAAAAAAAAAAAAREAIAAAAiQAAFAAAHAAHAYAwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=="); Assert.IsTrue(BitArrays.Equals(ref_, fp)); }
public void TestMultithReadedUsage() { var mol1 = parser.ParseSmiles("C=C(C1=CC=C(C=C1)O)NNC2=C(C(=NC(=C2Cl)Cl)C(=O)O)Cl"); IAtomContainer mol2 = parser .ParseSmiles("C1=CC=C(C=C1)C[N+]2=C(C=C(C=C2C=CC3=CC=CC=C3)C4=CC=CC=C4)C5=CC=CC=C5"); AtomContainerManipulator.PercieveAtomTypesAndConfigureAtoms(mol1); AtomContainerManipulator.PercieveAtomTypesAndConfigureAtoms(mol2); var adder = CDK.HydrogenAdder; adder.AddImplicitHydrogens(mol1); AtomContainerManipulator.ConvertImplicitToExplicitHydrogens(mol1); Aromaticity.CDKLegacy.Apply(mol1); adder.AddImplicitHydrogens(mol2); AtomContainerManipulator.ConvertImplicitToExplicitHydrogens(mol2); Aromaticity.CDKLegacy.Apply(mol2); IFingerprinter fp = new PubchemFingerprinter(); BitArray bs1 = fp.GetBitFingerprint(mol1).AsBitSet(); BitArray bs2 = fp.GetBitFingerprint(mol2).AsBitSet(); // now lets run some threads var objs = new List <FpRunner> { new FpRunner(mol1), new FpRunner(mol2) }; var ret = Parallel.ForEach(objs, o => o.Call()); Assert.IsTrue(ret.IsCompleted); BitArray fb1 = objs[0].Result; Assert.IsNotNull(fb1); BitArray fb2 = objs[1].Result; Assert.IsNotNull(fb2); Assert.IsTrue(BitArrays.Equals(bs1, fb1)); Assert.IsTrue(BitArrays.Equals(bs2, fb2)); }
public void TestGetFingerprintAsBytes() { var mol = parser.ParseSmiles("C=C(C1=CC=C(C=C1)O)NNC2=C(C(=NC(=C2Cl)Cl)C(=O)O)Cl"); AtomContainerManipulator.PercieveAtomTypesAndConfigureAtoms(mol); var adder = CDK.HydrogenAdder; adder.AddImplicitHydrogens(mol); AtomContainerManipulator.ConvertImplicitToExplicitHydrogens(mol); Aromaticity.CDKLegacy.Apply(mol); PubchemFingerprinter printer = new PubchemFingerprinter(); BitArray fp = printer.GetBitFingerprint(mol).AsBitSet(); byte[] actual = printer.GetFingerprintAsBytes(); byte[] expected = Arrays.CopyOf(ToByteArray(fp), actual.Length); Assert.IsTrue(Compares.AreEqual(expected, actual)); }
public void Testfp2() { IFingerprinter printer = new PubchemFingerprinter(); var mol1 = parser.ParseSmiles("CC(N)CCCN"); var mol2 = parser.ParseSmiles("CC(N)CCC"); var mol3 = parser.ParseSmiles("CCCC"); AtomContainerManipulator.PercieveAtomTypesAndConfigureAtoms(mol1); AtomContainerManipulator.PercieveAtomTypesAndConfigureAtoms(mol2); AtomContainerManipulator.PercieveAtomTypesAndConfigureAtoms(mol3); Aromaticity.CDKLegacy.Apply(mol1); Aromaticity.CDKLegacy.Apply(mol2); Aromaticity.CDKLegacy.Apply(mol3); BitArray bs1 = printer.GetBitFingerprint(mol1).AsBitSet(); BitArray bs2 = printer.GetBitFingerprint(mol2).AsBitSet(); BitArray bs3 = printer.GetBitFingerprint(mol3).AsBitSet(); Assert.IsTrue(FingerprinterTool.IsSubset(bs1, bs2)); Assert.IsTrue(FingerprinterTool.IsSubset(bs2, bs3)); }
public void TestGetSize() { IFingerprinter printer = new PubchemFingerprinter(); Assert.AreEqual(881, printer.Length); }
public void TestDecode_invalid() { PubchemFingerprinter.Decode("a"); }
/// <summary> /// Build fingerprint /// </summary> /// <param name="mol"></param> public static BitSetFingerprint BuildBitSetFingerprint( IAtomContainer mol, FingerprintType fpType, int fpSubtype = -1, int fpLen = -1) { // Data for Tanimoto similarity using various fingerprint types for CorpId 123456 query. // Cart - Standard MDL Oracle Cartridge scores // // Similarity Score // ------------------------------------------------ // Size -> 192 896 1024 1024 128 1024 320 // CorpId Cart MACCS PbChm ECFP4 EXT EState Basic Sbstr // ------ ---- ---- ---- ---- ---- ---- ---- ---- // 123456 0.99 1.00 1.00 1.00 1.00 1.00 1.00 1.00 // 123456 0.99 0.98 0.96 0.77 0.95 1.00 0.95 1.00 // 123456 0.99 0.98 0.96 0.77 0.95 1.00 0.94 1.00 // 123456 0.99 1.00 1.00 1.00 1.00 1.00 1.00 1.00 // 123456 0.99 1.00 1.00 1.00 1.00 1.00 1.00 1.00 // 123456 0.99 0.91 1.00 0.81 1.00 1.00 1.00 1.00 // 123456 0.98 0.95 1.00 0.74 0.92 1.00 0.93 0.94 // 123456 0.98 1.00 1.00 1.00 1.00 1.00 1.00 1.00 // 123456 0.98 1.00 1.00 1.00 1.00 1.00 1.00 1.00 // 123456 0.98 1.00 0.83 0.76 0.77 0.90 0.76 0.94 // LSH Bin Count - The number of LSH bins (of 25) that match the query bin values //-------------- // CorpId MAC PbC ECFP EX // ------ --- --- --- --- // 123456 25 25 25 25 // 123456 25 20 7 16 // 123456 25 20 9 19 // 123456 25 25 25 25 // 123456 25 25 25 25 // 123456 20 25 9 25 // 123456 21 25 11 17 // 123456 25 25 25 25 // 123456 25 25 25 25 // 123456 25 9 6 11 // Data for Tanimoto similarity using various Circular fingerprint types. // Using 2 molecules where the 2nd just has an added methyl group. // // Measure Score // -------- ----- // ECFP0 1.00 // ECFP2 .88 // ECFP4 .75 // ECFP6 .64 // FCFP0 1.00 // FCFP2 .92 // FCFP4 .84 // FCFP6 .74 IFingerprinter ifptr = null; IBitFingerprint ibfp = null; BitSetFingerprint bfp = null; IAtomContainer mol2; string s = ""; DateTime t0 = DateTime.Now; double getFptrTime = 0, buildFpTime = 0; if (fpType == FingerprintType.Basic) // size = 1024 { ifptr = new Fingerprinter(); } else if (fpType == FingerprintType.Circular) // size variable { CircularFingerprinterClass cfpClass = (CircularFingerprinterClass)fpSubtype; if (cfpClass < CircularFingerprinterClass.ECFP0 || cfpClass > CircularFingerprinterClass.ECFP6) { cfpClass = (CircularFingerprinterClass)CircularFingerprintType.DefaultCircularClass; // default class } if (fpLen < 0) { fpLen = CircularFingerprintType.DefaultCircularLength; // default length } ifptr = new CircularFingerprinter(cfpClass, fpLen); //CircularFingerprinter cfp = (CircularFingerprinter)ifptr; //ICountFingerprint cntFp = cfp.getCountFingerprint(mol); // debug //s = CircularFpToString(cfp); // debug } else if (fpType == FingerprintType.Extended) // size = 1024 { ifptr = new ExtendedFingerprinter(); // use DEFAULT_SIZE and DEFAULT_SEARCH_DEPTH } else if (fpType == FingerprintType.EState) // size = 128 { ifptr = new EStateFingerprinter(); // use DEFAULT_SIZE and DEFAULT_SEARCH_DEPTH } else if (fpType == FingerprintType.MACCS) // size = 192 { if (MACCSFp == null) { MACCSFp = new MACCSFingerprinter(); } ifptr = MACCSFp; } else if (fpType == FingerprintType.PubChem) // size = 896 { //IChemObjectBuilder builder = DefaultChemObjectBuilder.getInstance(); ifptr = new PubchemFingerprinter(); } else if (fpType == FingerprintType.ShortestPath) // size = { ifptr = new ShortestPathFingerprinter(); // fails with atom type issue for many structures (e.g. 123456) } else if (fpType == FingerprintType.Signature) // size = { ifptr = new SignatureFingerprinter(); // can't convert array fingerprint to bitsetfingerprint } else if (fpType == FingerprintType.Substructure) // size = 320 { ifptr = new SubstructureFingerprinter(); } else { throw new Exception("Invalid CdkFingerprintType: " + fpType); } getFptrTime = TimeOfDay.Delta(ref t0); ibfp = ifptr.GetBitFingerprint(mol); bfp = (BitSetFingerprint)ibfp; buildFpTime = TimeOfDay.Delta(ref t0); //long size = bfp.size(); //int card = bfp.Cardinality; return(bfp); }
public void TestGetSize() { IFingerprinter printer = new PubchemFingerprinter(ChemObjectBuilder.Instance); Assert.AreEqual(881, printer.Length); }