private static void CheckFPSmartsForMolecule(string moleculeSmiles, string[][] expectedFPSmarts) { var expected = new HashSet <string>(); foreach (var strs in expectedFPSmarts) { foreach (var str in strs) { expected.Add(str); } } // expectedFPSmarts[][] is a double array because for each smarts // several equivalent variants // of the smarts are given e.g. CCC C(C)C var mol = parser.ParseSmiles(moleculeSmiles); CircularFingerprinter circ = new CircularFingerprinter(); circ.Calculate(mol); var subsmarts = new SmartsFragmentExtractor(mol); subsmarts.SetMode(SubstructureSelectionMode.JCompoundMapper); var numFP = circ.FPCount; var actual = new HashSet <string>(); for (int i = 0; i < numFP; i++) { var fp = circ.GetFP(i); actual.Add(subsmarts.Generate(fp.Atoms)); } Assert.IsTrue(expected.IsSupersetOf(actual)); }
public static string CircularFpToString(CircularFingerprinter cfp) { CircularFingerprint fp = null; string s = "fp\thashCode\titeration\tatoms\r\n"; int fpCount = cfp.FPCount; for (int fpi = 0; fpi < fpCount; fpi++) { fp = cfp.GetFP(fpi); s += fpi.ToString() + "\t" + fp.Hash + "\t" + fp.Iteration + "\t(" + string.Join(", ", fp.Atoms) + ")\r\n"; } return(s); }
/// <summary> /// Appends a new row to the model source data, which consists of a molecule and whether or not it /// is considered active. /// </summary> /// <param name="mol">molecular structure, which must be non-blank</param> /// <param name="active">whether active or not</param> public void AddMolecule(IAtomContainer mol, bool active) { if (mol == null || mol.Atoms.Count == 0) { throw new CDKException("Molecule cannot be blank or null."); } var circ = new CircularFingerprinter(ClassType) { PerceiveStereo = this.PerceiveStereo }; circ.Calculate(mol); // gather all of the (folded) fingerprints into a sorted set int AND_BITS = Folding - 1; // e.g. 1024/0x400 -> 1023/0x3FF: chop off higher order bits var hashset = new SortedSet <int>(); for (int n = circ.FPCount - 1; n >= 0; n--) { int code = circ.GetFP(n).Hash; if (Folding > 0) { code &= AND_BITS; } hashset.Add(code); } // convert the set into a sorted primitive array var hashes = new int[hashset.Count]; int p = 0; foreach (var h in hashset) { hashes[p++] = h; } // record the processed information for model building purposes if (active) { numActive++; } training.Add(hashes); activity.Add(active); foreach (var h in hashes) { if (!inHash.TryGetValue(h, out int[] stash))
/// <summary> /// BuildTest /// </summary> public static void BuildTest() { CircularFingerprinter cfp = null; CircularFingerprinterClass FpClass = CircularFingerprinterClass.ECFP6; // FP diameter int FpLen = 2048; // folded binary fp length IAtomContainer mol, mol2; //string molfile = FileUtil.ReadFile(@"C:\Download\CorpId-12345.mol"); //java.io.StringReader sr = new java.io.StringReader(molfile); //if (Lex.Contains(molfile, "v2000")) // cor = new MDLV2000Reader(sr); //else // cor = new MDLV3000Reader(sr); //cor.setReaderMode(IChemObjectReader.Mode.RELAXED); //ac = (IAtomContainer)cor.read(new AtomContainer()); //cor.close(); FpClass = CircularFingerprinterClass.ECFP4; // debug cfp = new CircularFingerprinter(FpClass, FpLen); StreamReader reader = new StreamReader(@"C:\Download\CorpId-12345.mol"); //FileReader FileReader = new FileReader(@"C:\Download\V3000 Mols.sdf"); EnumerableSDFReader rdr = new EnumerableSDFReader(reader.BaseStream, ChemObjectBuilder.Instance); rdr.ReaderMode = ChemObjectReaderMode.Relaxed; IEnumerator <IAtomContainer> cursor = rdr.GetEnumerator(); while (cursor.MoveNext()) { mol = cursor.Current; mol = CdkMol.GetLargestMoleculeFragment(mol); ICountFingerprint cfp1 = cfp.GetCountFingerprint(mol); // get hash values and counts for each cfp.Calculate(mol); int fpCount = cfp.FPCount; for (int fpi = 0; fpi < fpCount; fpi++) // gets { CircularFingerprint cfp2 = cfp.GetFP(fpi); // gets hash, iteration and lists of atoms (dups appear multiple times) } IBitFingerprint bfp = cfp.GetBitFingerprint(mol); BitArray bs = bfp.AsBitSet(); int bsCard = bfp.Cardinality; long bsSize = bfp.Length; continue; } reader.Close(); return; //java.io.StringReader sr = new java.io.StringReader(molfile); //AtomContainer mol = new AtomContainer(); //mol.addAtom(new Atom("C")); //mol.addAtom(new Atom("H")); //mol.addAtom(new Atom("H")); //mol.addAtom(new Atom("H")); //mol.addAtom(new Atom("H")); //mol.addBond(new Bond(mol.getAtom(0), mol.getAtom(1))); //mol.addBond(new Bond(mol.getAtom(0), mol.getAtom(2))); //mol.addBond(new Bond(mol.getAtom(0), mol.getAtom(3))); //mol.addBond(new Bond(mol.getAtom(0), mol.getAtom(4))); //FileReader FileReader = new FileReader(@"C:\Download\CorpId-12345.mol"); //MolReader mr = new MolReader(FileReader, DefaultChemObjectBuilder.getInstance()); //java.io.StringReader sr = new java.io.StringReader(molfile); //IMol m = (IMol)mr.next(); //FileReader.close(); }