コード例 #1
0
        private static void CheckFPSmartsForMolecule(string moleculeSmiles, string[][] expectedFPSmarts)
        {
            var expected = new HashSet <string>();

            foreach (var strs in expectedFPSmarts)
            {
                foreach (var str in strs)
                {
                    expected.Add(str);
                }
            }

            // expectedFPSmarts[][] is a double array because for each smarts
            // several equivalent variants
            // of the smarts are given e.g. CCC C(C)C
            var mol = parser.ParseSmiles(moleculeSmiles);

            CircularFingerprinter circ = new CircularFingerprinter();

            circ.Calculate(mol);
            var subsmarts = new SmartsFragmentExtractor(mol);

            subsmarts.SetMode(SubstructureSelectionMode.JCompoundMapper);
            var numFP = circ.FPCount;

            var actual = new HashSet <string>();

            for (int i = 0; i < numFP; i++)
            {
                var fp = circ.GetFP(i);
                actual.Add(subsmarts.Generate(fp.Atoms));
            }

            Assert.IsTrue(expected.IsSupersetOf(actual));
        }
コード例 #2
0
        public static string CircularFpToString(CircularFingerprinter cfp)
        {
            CircularFingerprint fp = null;

            string s = "fp\thashCode\titeration\tatoms\r\n";

            int fpCount = cfp.FPCount;

            for (int fpi = 0; fpi < fpCount; fpi++)
            {
                fp = cfp.GetFP(fpi);
                s += fpi.ToString() + "\t" + fp.Hash + "\t" + fp.Iteration + "\t(" + string.Join(", ", fp.Atoms) + ")\r\n";
            }

            return(s);
        }
コード例 #3
0
ファイル: Bayesian.cs プロジェクト: roddickchen/NCDK
        /// <summary>
        /// Appends a new row to the model source data, which consists of a molecule and whether or not it
        /// is considered active.
        /// </summary>
        /// <param name="mol">molecular structure, which must be non-blank</param>
        /// <param name="active">whether active or not</param>
        public void AddMolecule(IAtomContainer mol, bool active)
        {
            if (mol == null || mol.Atoms.Count == 0)
            {
                throw new CDKException("Molecule cannot be blank or null.");
            }

            var circ = new CircularFingerprinter(ClassType)
            {
                PerceiveStereo = this.PerceiveStereo
            };

            circ.Calculate(mol);

            // gather all of the (folded) fingerprints into a sorted set
            int AND_BITS = Folding - 1; // e.g. 1024/0x400 -> 1023/0x3FF: chop off higher order bits
            var hashset  = new SortedSet <int>();

            for (int n = circ.FPCount - 1; n >= 0; n--)
            {
                int code = circ.GetFP(n).Hash;
                if (Folding > 0)
                {
                    code &= AND_BITS;
                }
                hashset.Add(code);
            }

            // convert the set into a sorted primitive array
            var hashes = new int[hashset.Count];
            int p      = 0;

            foreach (var h in hashset)
            {
                hashes[p++] = h;
            }

            // record the processed information for model building purposes
            if (active)
            {
                numActive++;
            }
            training.Add(hashes);
            activity.Add(active);
            foreach (var h in hashes)
            {
                if (!inHash.TryGetValue(h, out int[] stash))
コード例 #4
0
        /// <summary>
        /// BuildTest
        /// </summary>
        public static void BuildTest()
        {
            CircularFingerprinter      cfp     = null;
            CircularFingerprinterClass FpClass = CircularFingerprinterClass.ECFP6; // FP diameter
            int FpLen = 2048;                                                      // folded binary fp length

            IAtomContainer mol, mol2;

            //string molfile = FileUtil.ReadFile(@"C:\Download\CorpId-12345.mol");
            //java.io.StringReader sr = new java.io.StringReader(molfile);
            //if (Lex.Contains(molfile, "v2000"))
            //  cor = new MDLV2000Reader(sr);
            //else
            //  cor = new MDLV3000Reader(sr);

            //cor.setReaderMode(IChemObjectReader.Mode.RELAXED);

            //ac = (IAtomContainer)cor.read(new AtomContainer());
            //cor.close();

            FpClass = CircularFingerprinterClass.ECFP4;             // debug

            cfp = new CircularFingerprinter(FpClass, FpLen);

            StreamReader reader = new StreamReader(@"C:\Download\CorpId-12345.mol");
            //FileReader FileReader = new FileReader(@"C:\Download\V3000 Mols.sdf");

            EnumerableSDFReader rdr = new EnumerableSDFReader(reader.BaseStream, ChemObjectBuilder.Instance);

            rdr.ReaderMode = ChemObjectReaderMode.Relaxed;
            IEnumerator <IAtomContainer> cursor = rdr.GetEnumerator();

            while (cursor.MoveNext())
            {
                mol = cursor.Current;

                mol = CdkMol.GetLargestMoleculeFragment(mol);

                ICountFingerprint cfp1 = cfp.GetCountFingerprint(mol);                 // get hash values and counts for each

                cfp.Calculate(mol);
                int fpCount = cfp.FPCount;
                for (int fpi = 0; fpi < fpCount; fpi++)                 // gets
                {
                    CircularFingerprint cfp2 = cfp.GetFP(fpi);          // gets hash, iteration and lists of atoms (dups appear multiple times)
                }

                IBitFingerprint bfp    = cfp.GetBitFingerprint(mol);
                BitArray        bs     = bfp.AsBitSet();
                int             bsCard = bfp.Cardinality;
                long            bsSize = bfp.Length;
                continue;
            }

            reader.Close();

            return;


            //java.io.StringReader sr = new java.io.StringReader(molfile);
            //AtomContainer mol = new AtomContainer();

            //mol.addAtom(new Atom("C"));
            //mol.addAtom(new Atom("H"));
            //mol.addAtom(new Atom("H"));
            //mol.addAtom(new Atom("H"));
            //mol.addAtom(new Atom("H"));
            //mol.addBond(new Bond(mol.getAtom(0), mol.getAtom(1)));
            //mol.addBond(new Bond(mol.getAtom(0), mol.getAtom(2)));
            //mol.addBond(new Bond(mol.getAtom(0), mol.getAtom(3)));
            //mol.addBond(new Bond(mol.getAtom(0), mol.getAtom(4)));

            //FileReader FileReader = new FileReader(@"C:\Download\CorpId-12345.mol");
            //MolReader mr = new MolReader(FileReader, DefaultChemObjectBuilder.getInstance());
            //java.io.StringReader sr = new java.io.StringReader(molfile);
            //IMol m = (IMol)mr.next();
            //FileReader.close();
        }