public void TestAngleMatch2()
        {
            var filename = "NCDK.Data.MDL.cnssmarts.sdf";
            var ins      = ResourceLoader.GetAsStream(filename);
            var reader   = new EnumerableSDFReader(ins, CDK.Builder);

            var query = new PharmacophoreQuery();
            var n1    = new PharmacophoreQueryAtom("BasicAmine", "[NX3;h2,h1,H1,H2;!$(NC=O)]");
            var n2    = new PharmacophoreQueryAtom("BasicAmine", "[NX3;h2,h1,H1,H2;!$(NC=O)]");
            var n3    = new PharmacophoreQueryAtom("BasicAmine", "[NX3;h2,h1,H1,H2;!$(NC=O)]");
            var b1    = new PharmacophoreQueryAngleBond(n1, n2, n3, 89.14);

            query.Atoms.Add(n1);
            query.Atoms.Add(n2);
            query.Atoms.Add(n3);
            query.Bonds.Add(b1);

            var mol = reader.First();

            reader.Close();

            var matcher = new PharmacophoreMatcher(query);
            var status  = matcher.Matches(mol);

            Assert.IsTrue(status);
        }
Beispiel #2
0
        public static void MakeCanonicalSmileFromRingSystems(string dataFileIn, string dataFileOut)
        {
            Console.Out.WriteLine("Start make SMILES...");
            var data   = new List <string>();
            var smiles = new SmilesGenerator();

            try
            {
                Console.Out.WriteLine("Start...");
                using (var imdl = new EnumerableSDFReader(new StreamReader(dataFileIn), builder))
                {
                    Console.Out.WriteLine("Read File in..");

                    foreach (var m in imdl)
                    {
                        try
                        {
                            data.Add((string)smiles.Create(builder.NewAtomContainer(m)));
                        }
                        catch (Exception exc1)
                        {
                            if (!(exc1 is CDKException || exc1 is IOException))
                            {
                                throw;
                            }
                            Console.Out.WriteLine("Could not create smile due to: " + exc1.Message);
                        }
                    }
                }
            }
            catch (Exception exc)
            {
                Console.Out.WriteLine("Could not read Molecules from file " + dataFileIn + " due to: " + exc.Message);
            }

            Console.Out.Write("...ready\nWrite data...");
            try
            {
                using (var fout = new StreamWriter(dataFileOut))
                {
                    for (int i = 0; i < data.Count; i++)
                    {
                        try
                        {
                            fout.Write(((string)data[i]));
                            fout.WriteLine();
                        }
                        catch (Exception)
                        {
                        }
                    }
                    Console.Out.WriteLine($"number of smiles: {data.Count}");
                }
            }
            catch (Exception exc3)
            {
                Console.Out.WriteLine($"Could not write smile in file {dataFileOut} due to: {exc3.Message}");
            }
            Console.Out.WriteLine("...ready");
        }
Beispiel #3
0
        public static void PartitionRingsFromComplexRing(string dataFile)
        {
            var som = builder.NewAtomContainerSet();

            try
            {
                Console.Out.WriteLine("Start...");
                using (var fin = new StreamReader(dataFile))
                    using (var imdl = new EnumerableSDFReader(fin, builder))
                    {
                        Console.Out.Write("Read File in..");
                        Console.Out.WriteLine("READY");
                        foreach (var m in imdl)
                        {
                            Console.Out.WriteLine($"Atoms: {m.Atoms.Count}");
                            IRingSet ringSetM = Cycles.FindSSSR(m).ToRingSet();
                            // som.Add(m);
                            for (int i = 0; i < ringSetM.Count; i++)
                            {
                                som.Add(builder.NewAtomContainer(ringSetM[i]));
                            }
                        }
                    }
            }
            catch (Exception exc)
            {
                Console.Out.WriteLine($"Could not read Molecules from file {dataFile} due to: {exc.Message}");
            }
            Console.Out.WriteLine($"{som.Count} Templates are read in");
            WriteChemModel(som, dataFile, "_VERSUCH");
        }
Beispiel #4
0
        // ----------------- private methods -----------------

        // make sure that for a single molecule, the way that the hashes are created & folded is consistent with a reference
        private static void CheckFP(string molstr, CircularFingerprinterClass classType, int folding, int[] refHash)
        {
            var strType = classType == CircularFingerprinterClass.ECFP6 ? "ECFP6" : "FCFP6";

            WriteLine($"Comparing hash codes for {strType}/folding={folding}");

            var mol   = new EnumerableSDFReader(new StringReader(molstr), ChemObjectBuilder.Instance).First();
            var model = new Bayesian(classType, folding);

            model.AddMolecule(mol, false);

            var calcHash = model.Training[0];
            var same     = calcHash.Length == refHash.Length;

            if (same)
            {
                for (int n = 0; n < calcHash.Length; n++)
                {
                    if (calcHash[n] != refHash[n])
                    {
                        same = false;
                        break;
                    }
                }
            }
            if (!same)
            {
                WriteLine($"    ** calculated: {ArrayStr(calcHash)}");
                WriteLine($"    ** reference:  {ArrayStr(refHash)}");
                throw new CDKException("Hashes differ.");
            }
        }
        public void TestCNSPcore()
        {
            var filename = "NCDK.Data.MDL.cnssmarts.sdf";
            var ins      = ResourceLoader.GetAsStream(filename);
            var reader   = new EnumerableSDFReader(ins, CDK.Builder);

            var query = new PharmacophoreQuery();
            var arom  = new PharmacophoreQueryAtom("A", "c1ccccc1");
            var n1    = new PharmacophoreQueryAtom("BasicAmine", "[NX3;h2,h1,H1,H2;!$(NC=O)]");
            var b1    = new PharmacophoreQueryBond(arom, n1, 5.0, 7.0);

            query.Atoms.Add(arom);
            query.Atoms.Add(n1);
            query.Bonds.Add(b1);

            var mol = reader.First();

            reader.Close();

            var matcher = new PharmacophoreMatcher(query);
            var status  = matcher.Matches(mol);

            Assert.IsTrue(status);

            var pmatches = matcher.GetMatchingPharmacophoreAtoms();

            Assert.AreEqual(1, pmatches.Count);

            var upmatches = matcher.GetUniqueMatchingPharmacophoreAtoms();

            Assert.AreEqual(1, upmatches.Count);
        }
Beispiel #6
0
 /// <summary>
 /// Load ring template
 /// </summary>
 /// <exception cref="CDKException">The template file cannot be loaded</exception>
 private void LoadTemplates()
 {
     try
     {
         using (var gin = GetType().Assembly.GetManifestResourceStream(GetType(), TemplatePath))
             using (var ins = new GZipStream(gin, CompressionMode.Decompress))
                 using (var sdfr = new EnumerableSDFReader(ins, builder))
                 {
                     foreach (var mol in sdfr)
                     {
                         AddTemplateMol(mol);
                     }
                 }
     }
     catch (IOException e)
     {
         throw new CDKException("Could not load ring templates", e);
     }
 }
Beispiel #7
0
        public void TestMatchingBonds()
        {
            var filename = "NCDK.Data.MDL.cnssmarts.sdf";
            var ins      = ResourceLoader.GetAsStream(filename);
            EnumerableSDFReader reader = new EnumerableSDFReader(ins, CDK.Builder);

            PharmacophoreQuery     query = new PharmacophoreQuery();
            PharmacophoreQueryAtom arom  = new PharmacophoreQueryAtom("A", "c1ccccc1");
            PharmacophoreQueryAtom n1    = new PharmacophoreQueryAtom("BasicAmine", "[NX3;h2,h1,H1,H2;!$(NC=O)]");
            PharmacophoreQueryBond b1    = new PharmacophoreQueryBond(arom, n1, 5.0, 7.0);

            query.Atoms.Add(arom);
            query.Atoms.Add(n1);
            query.Bonds.Add(b1);

            IAtomContainer mol = (IAtomContainer)reader.First();

            reader.Close();

            PharmacophoreMatcher matcher = new PharmacophoreMatcher(query);
            bool status = matcher.Matches(mol);

            Assert.IsTrue(status);

            var pmatches = matcher.GetMatchingPharmacophoreAtoms();

            Assert.AreEqual(1, pmatches.Count);

            var upmatches = matcher.GetUniqueMatchingPharmacophoreAtoms();

            Assert.AreEqual(1, upmatches.Count);

            var bmatches = matcher.GetMatchingPharmacophoreBonds();

            Assert.AreEqual(1, bmatches.Count);
            var bmatch = bmatches[0];

            Assert.AreEqual(1, bmatch.Count);
            PharmacophoreBond pbond = (PharmacophoreBond)BondRef.Deref(bmatch[0]);

            Assert.AreEqual(5.63, pbond.BondLength, 0.01);
        }
Beispiel #8
0
        // compares a series of molecules for folding fingerprints being literally identical
        private static void CompareFolding(string sdfile, string fpField, CircularFingerprinterClass classType, int folding)
        {
            WriteLine($"[{sdfile}] calculation of: {fpField}");

            using (var ins = ResourceLoader.GetAsStream($"NCDK.Data.CDD.{sdfile}"))
            {
                var rdr = new EnumerableSDFReader(ins, ChemObjectBuilder.Instance);

                int row = 0;
                foreach (var mol in rdr)
                {
                    row++;
                    var model = new Bayesian(classType, folding);
                    model.AddMolecule(mol, false);
                    var hashes    = model.Training[0];
                    var gotHashes = ArrayStr(hashes);
                    var reqHashes = (string)mol.GetProperties()[fpField];
                    Assert.AreEqual(reqHashes, gotHashes, $"Folded hashes do not match reference at {row}.");
                }
            }
        }
        public void MultiSmartsQuery()
        {
            var query = new PharmacophoreQuery();
            var rings = new PharmacophoreQueryAtom("A", "c1ccccc1|C1CCCC1");
            var o1    = new PharmacophoreQueryAtom("Hd", "[OX1]");
            var b1    = new PharmacophoreQueryBond(rings, o1, 3.5, 5.8);

            query.Atoms.Add(rings);
            query.Atoms.Add(o1);
            query.Bonds.Add(b1);

            var matcher = new PharmacophoreMatcher();

            matcher.SetPharmacophoreQuery(query);

            var filename   = "NCDK.Data.PCore.multismartpcore.sdf";
            var ins        = ResourceLoader.GetAsStream(filename);
            var reader     = new EnumerableSDFReader(ins, CDK.Builder);
            var enumerator = reader.GetEnumerator();

            enumerator.MoveNext();
            var mol = enumerator.Current;

            Assert.IsTrue(matcher.Matches(mol));
            Assert.AreEqual(1, matcher.GetUniqueMatchingPharmacophoreAtoms().Count);
            Assert.AreEqual(2, matcher.GetUniqueMatchingPharmacophoreAtoms()[0].Count);

            enumerator.MoveNext();
            mol = enumerator.Current;
            Assert.IsTrue(matcher.Matches(mol));
            Assert.AreEqual(2, matcher.GetUniqueMatchingPharmacophoreAtoms().Count);
            Assert.AreEqual(2, matcher.GetUniqueMatchingPharmacophoreAtoms()[0].Count);
            Assert.AreEqual(2, matcher.GetUniqueMatchingPharmacophoreAtoms()[1].Count);

            enumerator.MoveNext();
            mol = enumerator.Current;
            reader.Close();
            Assert.IsFalse(matcher.Matches(mol));
        }
Beispiel #10
0
        public static void CleanDataSet(string dataFile)
        {
            var som = builder.NewAtomContainerSet();

            try
            {
                Console.Out.WriteLine("Start clean dataset...");
                using (var fin = new StreamReader(dataFile))
                    using (var imdl = new EnumerableSDFReader(fin, builder))
                    {
                        Console.Out.WriteLine("READY");
                        int c = 0;
                        foreach (var m in imdl)
                        {
                            c++;
                            if (c % 1000 == 0)
                            {
                                Console.Out.WriteLine("...");
                            }
                            if (m.Atoms.Count > 2)
                            {
                                if (m.Atoms[0].Point3D != null)
                                {
                                    som.Add(m);
                                }
                            }
                        }
                    }
                Console.Out.Write("Read File in..");
            }
            catch (Exception exc)
            {
                Console.Out.WriteLine($"Could not read Molecules from file {dataFile} due to: {exc.Message}");
            }
            Console.Out.WriteLine($"{som.Count} Templates are read in");
            WriteChemModel(som, dataFile, "_CLEAN");
        }
Beispiel #11
0
        public static void ReadNCISdfFileAsTemplate(string dataFile)
        {
            var som = builder.NewAtomContainerSet();

            try
            {
                Console.Out.WriteLine("Start...");
                using (var fin = new StreamReader(dataFile))
                    using (var imdl = new EnumerableSDFReader(fin, builder))
                    {
                        Console.Out.Write("Read File in..");
                        Console.Out.WriteLine("READY");
                        foreach (var m in imdl)
                        {
                            som.Add(m);
                        }
                    }
            }
            catch (Exception exc)
            {
                Console.Out.WriteLine($"Could not read Molecules from file {dataFile} due to: {exc.Message}");
            }
            Console.Out.WriteLine(som.Count + " Templates are read in");
        }
Beispiel #12
0
        public static IReadOnlyList <IBitFingerprint> MakeFingerprintsFromSdf(bool anyAtom, bool anyAtomAnyBond, Dictionary <string, int> timings, TextReader fin, int limit)
        {
            var fingerPrinter = new HybridizationFingerprinter(HybridizationFingerprinter.DefaultSize, HybridizationFingerprinter.DefaultSearchDepth);

            fingerPrinter.SetHashPseudoAtoms(true);
            IAtomContainer query = null;
            var            data  = new List <IBitFingerprint>();

            try
            {
                Trace.TraceInformation("Read data file in ...");
                using (var imdl = new EnumerableSDFReader(fin, builder))
                {
                    Trace.TraceInformation("ready");

                    int moleculeCounter    = 0;
                    int fingerprintCounter = 0;
                    Trace.TraceInformation($"Generated Fingerprints: {fingerprintCounter}    ");
                    foreach (var m in imdl)
                    {
                        if (!(moleculeCounter < limit || limit == -1))
                        {
                            break;
                        }
                        moleculeCounter++;
                        if (anyAtom && !anyAtomAnyBond)
                        {
                            query = QueryAtomContainerCreator.CreateAnyAtomContainer(m, false);
                        }
                        else
                        {
                            query = AtomContainerManipulator.Anonymise(m);
                        }
                        try
                        {
                            var time = -DateTime.Now.Ticks / 10000;
                            if (anyAtom || anyAtomAnyBond)
                            {
                                data.Add(fingerPrinter.GetBitFingerprint(query));
                                fingerprintCounter = fingerprintCounter + 1;
                            }
                            else
                            {
                                data.Add(fingerPrinter.GetBitFingerprint(query));
                                fingerprintCounter = fingerprintCounter + 1;
                            }
                            time += (DateTime.Now.Ticks / 10000);
                            // store the time
                            var bin = ((int)Math.Floor(time / 10.0)).ToString(NumberFormatInfo.InvariantInfo);
                            if (timings.ContainsKey(bin))
                            {
                                timings[bin] = (timings[bin]) + 1;
                            }
                            else
                            {
                                timings[bin] = 1;
                            }
                        }
                        catch (Exception exc1)
                        {
                            Trace.TraceInformation($"QueryFingerprintError: from molecule:{moleculeCounter} due to:{exc1.Message}");

                            // OK, just adds a fingerprint with all ones, so that any
                            // structure will match this template, and leave it up
                            // to substructure match to figure things out
                            var allOnesFingerprint = new BitSetFingerprint(fingerPrinter.Length);
                            for (int i = 0; i < fingerPrinter.Length; i++)
                            {
                                allOnesFingerprint.Set(i);
                            }
                            data.Add(allOnesFingerprint);
                            fingerprintCounter = fingerprintCounter + 1;
                        }

                        if (fingerprintCounter % 2 == 0)
                        {
                            Trace.TraceInformation("\b" + "/");
                        }
                        else
                        {
                            Trace.TraceInformation("\b" + "\\");
                        }

                        if (fingerprintCounter % 100 == 0)
                        {
                            Trace.TraceInformation("\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"
                                                   + "Generated Fingerprints: " + fingerprintCounter + "   \n");
                        }
                    }// while
                    Trace.TraceInformation($"...ready with:{moleculeCounter} molecules\nWrite data...of data vector:{data.Count} fingerprintCounter:{fingerprintCounter}");
                }
            }
            catch (Exception exc)
            {
                Console.Out.WriteLine("Could not read Molecules from file" + " due to: " + exc.Message);
            }
            return(data);
        }
Beispiel #13
0
        public static void ExtractUniqueRingSystemsFromFile(string dataFile)
        {
            Console.Out.WriteLine("****** EXTRACT UNIQUE RING SYSTEMS ******");
            Console.Out.WriteLine($"From file: {dataFile}");

            Dictionary <string, string> hashRingSystems = new Dictionary <string, string>();
            SmilesGenerator             smilesGenerator = new SmilesGenerator();

            int            counterRings       = 0;
            int            counterMolecules   = 0;
            int            counterUniqueRings = 0;
            IRingSet       ringSet            = null;
            string         key = "";
            IAtomContainer ac  = null;

            string molfile = dataFile + "_UniqueRings";

            try
            {
                using (var fout = new FileStream(molfile, FileMode.Create))
                    using (var mdlw = new MDLV2000Writer(fout))
                    {
                        try
                        {
                            Console.Out.WriteLine("Start...");
                            using (var fin = new StreamReader(dataFile))
                                using (var imdl = new EnumerableSDFReader(fin, builder))
                                {
                                    Console.Out.WriteLine("Read File in..");

                                    foreach (var m in imdl)
                                    {
                                        counterMolecules = counterMolecules + 1;

                                        IRingSet ringSetM = Cycles.FindSSSR(m).ToRingSet();

                                        if (counterMolecules % 1000 == 0)
                                        {
                                            Console.Out.WriteLine("Molecules:" + counterMolecules);
                                        }

                                        if (ringSetM.Count > 0)
                                        {
                                            var ringSystems = RingPartitioner.PartitionRings(ringSetM);

                                            for (int i = 0; i < ringSystems.Count; i++)
                                            {
                                                ringSet = (IRingSet)ringSystems[i];
                                                ac      = builder.NewAtomContainer();
                                                var containers = RingSetManipulator.GetAllAtomContainers(ringSet);
                                                foreach (var container in containers)
                                                {
                                                    ac.Add(container);
                                                }
                                                counterRings = counterRings + 1;
                                                // Only connection is important
                                                for (int j = 0; j < ac.Atoms.Count; j++)
                                                {
                                                    (ac.Atoms[j]).Symbol = "C";
                                                }

                                                try
                                                {
                                                    key = smilesGenerator.Create(builder.NewAtomContainer(ac));
                                                }
                                                catch (CDKException e)
                                                {
                                                    Trace.TraceError(e.Message);
                                                    return;
                                                }

                                                if (hashRingSystems.ContainsKey(key))
                                                {
                                                }
                                                else
                                                {
                                                    counterUniqueRings = counterUniqueRings + 1; hashRingSystems[key] = "1";
                                                    try
                                                    {
                                                        mdlw.Write(builder.NewAtomContainer(ac));
                                                    }
                                                    catch (Exception emdl)
                                                    {
                                                        if (!(emdl is ArgumentException || emdl is CDKException))
                                                        {
                                                            throw;
                                                        }
                                                    }
                                                }
                                            }
                                        }
                                    }
                                }
                        }
                        catch (Exception exc)
                        {
                            Console.Out.WriteLine($"Could not read Molecules from file {dataFile} due to: {exc.Message}");
                        }
                    }
            }
            catch (Exception ex2)
            {
                Console.Out.WriteLine($"IOError:cannot write file due to: {ex2.ToString()}");
            }
            Console.Out.WriteLine($"READY Molecules:{counterMolecules} RingSystems:{counterRings} UniqueRingsSystem:{counterUniqueRings}");
            Console.Out.WriteLine($"HashtableKeys:{hashRingSystems.Count}");
        }
Beispiel #14
0
        private void RunTest(string sdfile, string actvField, CircularFingerprinterClass classType, int folding, int xval, string modelFN, bool perceiveStereo)
        {
            WriteLine("[" + modelFN + "]");
            WriteLine("    Loading " + sdfile);

            try
            {
                var model = new Bayesian(classType, folding)
                {
                    PerceiveStereo = perceiveStereo
                };

                int row = 0, numActives = 0;
                using (var rdr = new EnumerableSDFReader(ResourceLoader.GetAsStream($"NCDK.Data.CDD.{sdfile}"), ChemObjectBuilder.Instance))
                {
                    foreach (var mol in rdr)
                    {
                        row++;

                        var stractv = (string)mol.GetProperties()[actvField];
                        int active  = stractv.Equals("true", StringComparison.Ordinal) ? 1 : stractv.Equals("false", StringComparison.Ordinal) ? 0 : int.Parse(stractv, NumberFormatInfo.InvariantInfo);
                        if (active != 0 && active != 1)
                        {
                            throw new CDKException("Activity field not found or invalid");
                        }

                        model.AddMolecule(mol, active == 1);
                        numActives += active;
                    }
                }

                WriteLine($"    Training with {row} rows, {numActives} actives, {(row - numActives)} inactives");

                model.Build();
                if (xval == 3)
                {
                    model.ValidateThreeFold();
                }
                else if (xval == 5)
                {
                    model.ValidateFiveFold();
                }
                else
                {
                    model.ValidateLeaveOneOut();
                }

                WriteLine($"    Validation: ROC AUC={model.RocAUC}");
                WriteLine($"    Parsing reference model");

                //FileReader frdr=new FileReader(modelFN);
                Bayesian reference;
                using (var mrdr = new StreamReader(ResourceLoader.GetAsStream($"NCDK.Data.CDD.{modelFN}")))
                {
                    reference = Bayesian.Deserialise(mrdr);
                }

                // start comparing the details...

                bool failed = false;
                if (model.Folding != reference.Folding)
                {
                    WriteLine($"    ** reference folding size={reference.Folding}");
                    failed = true;
                }
                if (model.TrainingSize != reference.TrainingSize)
                {
                    WriteLine($"    ** reference training size={reference.TrainingSize}");
                    failed = true;
                }
                if (model.TrainingActives != reference.TrainingActives)
                {
                    WriteLine($"    ** reference training actives={reference.TrainingActives}");
                    failed = true;
                }
                if (model.RocType != reference.RocType)
                {
                    WriteLine($"    ** reference ROC type={reference.RocType}");
                    failed = true;
                }
                if (!DblEqual(model.RocAUC, reference.RocAUC))
                {
                    WriteLine($"    ** reference ROC AUC={reference.RocAUC}");
                    failed = true;
                }
                if (Math.Abs(model.LowThreshold - reference.LowThreshold) > 0.00000000000001)
                {
                    WriteLine($"    ** reference lowThresh={reference.LowThreshold} different to calculated {model.LowThreshold}");
                    failed = true;
                }
                if (Math.Abs(model.HighThreshold - reference.HighThreshold) > 0.00000000000001)
                {
                    WriteLine($"    ** reference highThresh={reference.HighThreshold} different to calculated {model.HighThreshold}");
                    failed = true;
                }

                // make sure individual hash bit contributions match
                var mbits = model.Contributions;
                var rbits = reference.Contributions;
                if (mbits.Count != rbits.Count)
                {
                    WriteLine($"    ** model has {mbits.Count} contribution bits, reference has {rbits.Count}");
                    failed = true;
                }
                foreach (var h in mbits.Keys)
                {
                    if (!rbits.ContainsKey(h))
                    {
                        WriteLine($"    ** model hash bit {h} not found in reference");
                        failed = true;
                        break; // one is enough
                    }
                }
                foreach (var h in rbits.Keys)
                {
                    if (!mbits.ContainsKey(h))
                    {
                        WriteLine($"    ** reference hash bit {h} not found in model");
                        failed = true;
                        break; // one is enough
                    }
                }
                foreach (var h in mbits.Keys)
                {
                    if (rbits.ContainsKey(h))
                    {
                        double c1 = mbits[h], c2 = rbits[h];
                        if (!DblEqual(c1, c2))
                        {
                            WriteLine($"    ** contribution for bit {h}: model={c1}, reference={c2}");
                            failed = true;
                            break; // one is enough
                        }
                    }
                }

                if (failed)
                {
                    throw new CDKException("Comparison to reference failed");
                }
            }
            catch (CDKException)
            {
                throw;
            }
            catch (Exception ex)
            {
                throw new CDKException("Test failed", ex);
            }
        }
Beispiel #15
0
        // builds a model and uses the scaled predictions to rack up a confusion matrix, for comparison
        private static void ConfirmPredictions(string sdfile, int truePos, int trueNeg, int falsePos, int falseNeg)
        {
            WriteLine($"[{sdfile}] comparing confusion matrix");

            var molecules  = new List <IAtomContainer>();
            var activities = new List <bool>();
            var model      = new Bayesian(CircularFingerprinterClass.ECFP6, 1024);

            try
            {
                using (var ins = ResourceLoader.GetAsStream("NCDK.Data.CDD." + sdfile))
                {
                    var rdr = new EnumerableSDFReader(ins, ChemObjectBuilder.Instance);

                    foreach (var mol in rdr)
                    {
                        bool actv = "true" == (string)mol.GetProperties()["Active"];
                        molecules.Add(mol);
                        activities.Add(actv);
                        model.AddMolecule(mol, actv);
                    }
                }
            }
            catch (CDKException)
            {
                throw;
            }
            catch (Exception ex)
            {
                throw new CDKException("Test failed", ex);
            }

            model.Build();
            model.ValidateLeaveOneOut();

            // build the confusion matrix
            int gotTP = 0, gotTN = 0, gotFP = 0, gotFN = 0;

            for (int n = 0; n < molecules.Count; n++)
            {
                double pred = model.ScalePredictor(model.Predict(molecules[n]));
                bool   actv = activities[n];
                if (pred >= 0.5)
                {
                    if (actv)
                    {
                        gotTP++;
                    }
                    else
                    {
                        gotFP++;
                    }
                }
                else
                {
                    if (actv)
                    {
                        gotFN++;
                    }
                    else
                    {
                        gotTN++;
                    }
                }
            }

            WriteLine("    True Positives:  got=" + gotTP + " require=" + truePos);
            WriteLine("         Negatives:  got=" + gotTN + " require=" + trueNeg);
            WriteLine("    False Positives: got=" + gotFP + " require=" + falsePos);
            WriteLine("          Negatives: got=" + gotFN + " require=" + falseNeg);

            if (gotTP != truePos || gotTN != trueNeg || gotFP != falsePos || gotFN != falseNeg)
            {
                throw new CDKException("Confusion matrix mismatch");
            }
        }
Beispiel #16
0
        /// <summary>
        /// BuildTest
        /// </summary>
        public static void BuildTest()
        {
            CircularFingerprinter      cfp     = null;
            CircularFingerprinterClass FpClass = CircularFingerprinterClass.ECFP6; // FP diameter
            int FpLen = 2048;                                                      // folded binary fp length

            IAtomContainer mol, mol2;

            //string molfile = FileUtil.ReadFile(@"C:\Download\CorpId-12345.mol");
            //java.io.StringReader sr = new java.io.StringReader(molfile);
            //if (Lex.Contains(molfile, "v2000"))
            //  cor = new MDLV2000Reader(sr);
            //else
            //  cor = new MDLV3000Reader(sr);

            //cor.setReaderMode(IChemObjectReader.Mode.RELAXED);

            //ac = (IAtomContainer)cor.read(new AtomContainer());
            //cor.close();

            FpClass = CircularFingerprinterClass.ECFP4;             // debug

            cfp = new CircularFingerprinter(FpClass, FpLen);

            StreamReader reader = new StreamReader(@"C:\Download\CorpId-12345.mol");
            //FileReader FileReader = new FileReader(@"C:\Download\V3000 Mols.sdf");

            EnumerableSDFReader rdr = new EnumerableSDFReader(reader.BaseStream, ChemObjectBuilder.Instance);

            rdr.ReaderMode = ChemObjectReaderMode.Relaxed;
            IEnumerator <IAtomContainer> cursor = rdr.GetEnumerator();

            while (cursor.MoveNext())
            {
                mol = cursor.Current;

                mol = CdkMol.GetLargestMoleculeFragment(mol);

                ICountFingerprint cfp1 = cfp.GetCountFingerprint(mol);                 // get hash values and counts for each

                cfp.Calculate(mol);
                int fpCount = cfp.FPCount;
                for (int fpi = 0; fpi < fpCount; fpi++)                 // gets
                {
                    CircularFingerprint cfp2 = cfp.GetFP(fpi);          // gets hash, iteration and lists of atoms (dups appear multiple times)
                }

                IBitFingerprint bfp    = cfp.GetBitFingerprint(mol);
                BitArray        bs     = bfp.AsBitSet();
                int             bsCard = bfp.Cardinality;
                long            bsSize = bfp.Length;
                continue;
            }

            reader.Close();

            return;


            //java.io.StringReader sr = new java.io.StringReader(molfile);
            //AtomContainer mol = new AtomContainer();

            //mol.addAtom(new Atom("C"));
            //mol.addAtom(new Atom("H"));
            //mol.addAtom(new Atom("H"));
            //mol.addAtom(new Atom("H"));
            //mol.addAtom(new Atom("H"));
            //mol.addBond(new Bond(mol.getAtom(0), mol.getAtom(1)));
            //mol.addBond(new Bond(mol.getAtom(0), mol.getAtom(2)));
            //mol.addBond(new Bond(mol.getAtom(0), mol.getAtom(3)));
            //mol.addBond(new Bond(mol.getAtom(0), mol.getAtom(4)));

            //FileReader FileReader = new FileReader(@"C:\Download\CorpId-12345.mol");
            //MolReader mr = new MolReader(FileReader, DefaultChemObjectBuilder.getInstance());
            //java.io.StringReader sr = new java.io.StringReader(molfile);
            //IMol m = (IMol)mr.next();
            //FileReader.close();
        }