예제 #1
0
        // builds a model and uses the scaled predictions to rack up a confusion matrix, for comparison
        private static void ConfirmPredictions(string sdfile, int truePos, int trueNeg, int falsePos, int falseNeg)
        {
            WriteLine($"[{sdfile}] comparing confusion matrix");

            var molecules  = new List <IAtomContainer>();
            var activities = new List <bool>();
            var model      = new Bayesian(CircularFingerprinterClass.ECFP6, 1024);

            try
            {
                using (var ins = ResourceLoader.GetAsStream("NCDK.Data.CDD." + sdfile))
                {
                    var rdr = new EnumerableSDFReader(ins, ChemObjectBuilder.Instance);

                    foreach (var mol in rdr)
                    {
                        bool actv = "true" == (string)mol.GetProperties()["Active"];
                        molecules.Add(mol);
                        activities.Add(actv);
                        model.AddMolecule(mol, actv);
                    }
                }
            }
            catch (CDKException)
            {
                throw;
            }
            catch (Exception ex)
            {
                throw new CDKException("Test failed", ex);
            }

            model.Build();
            model.ValidateLeaveOneOut();

            // build the confusion matrix
            int gotTP = 0, gotTN = 0, gotFP = 0, gotFN = 0;

            for (int n = 0; n < molecules.Count; n++)
            {
                double pred = model.ScalePredictor(model.Predict(molecules[n]));
                bool   actv = activities[n];
                if (pred >= 0.5)
                {
                    if (actv)
                    {
                        gotTP++;
                    }
                    else
                    {
                        gotFP++;
                    }
                }
                else
                {
                    if (actv)
                    {
                        gotFN++;
                    }
                    else
                    {
                        gotTN++;
                    }
                }
            }

            WriteLine("    True Positives:  got=" + gotTP + " require=" + truePos);
            WriteLine("         Negatives:  got=" + gotTN + " require=" + trueNeg);
            WriteLine("    False Positives: got=" + gotFP + " require=" + falsePos);
            WriteLine("          Negatives: got=" + gotFN + " require=" + falseNeg);

            if (gotTP != truePos || gotTN != trueNeg || gotFP != falsePos || gotFN != falseNeg)
            {
                throw new CDKException("Confusion matrix mismatch");
            }
        }