private void RunTest(string sdfile, string actvField, CircularFingerprinterClass classType, int folding, int xval, string modelFN, bool perceiveStereo) { WriteLine("[" + modelFN + "]"); WriteLine(" Loading " + sdfile); try { var model = new Bayesian(classType, folding) { PerceiveStereo = perceiveStereo }; int row = 0, numActives = 0; using (var rdr = new EnumerableSDFReader(ResourceLoader.GetAsStream($"NCDK.Data.CDD.{sdfile}"), ChemObjectBuilder.Instance)) { foreach (var mol in rdr) { row++; var stractv = (string)mol.GetProperties()[actvField]; int active = stractv.Equals("true", StringComparison.Ordinal) ? 1 : stractv.Equals("false", StringComparison.Ordinal) ? 0 : int.Parse(stractv, NumberFormatInfo.InvariantInfo); if (active != 0 && active != 1) { throw new CDKException("Activity field not found or invalid"); } model.AddMolecule(mol, active == 1); numActives += active; } } WriteLine($" Training with {row} rows, {numActives} actives, {(row - numActives)} inactives"); model.Build(); if (xval == 3) { model.ValidateThreeFold(); } else if (xval == 5) { model.ValidateFiveFold(); } else { model.ValidateLeaveOneOut(); } WriteLine($" Validation: ROC AUC={model.RocAUC}"); WriteLine($" Parsing reference model"); //FileReader frdr=new FileReader(modelFN); Bayesian reference; using (var mrdr = new StreamReader(ResourceLoader.GetAsStream($"NCDK.Data.CDD.{modelFN}"))) { reference = Bayesian.Deserialise(mrdr); } // start comparing the details... bool failed = false; if (model.Folding != reference.Folding) { WriteLine($" ** reference folding size={reference.Folding}"); failed = true; } if (model.TrainingSize != reference.TrainingSize) { WriteLine($" ** reference training size={reference.TrainingSize}"); failed = true; } if (model.TrainingActives != reference.TrainingActives) { WriteLine($" ** reference training actives={reference.TrainingActives}"); failed = true; } if (model.RocType != reference.RocType) { WriteLine($" ** reference ROC type={reference.RocType}"); failed = true; } if (!DblEqual(model.RocAUC, reference.RocAUC)) { WriteLine($" ** reference ROC AUC={reference.RocAUC}"); failed = true; } if (Math.Abs(model.LowThreshold - reference.LowThreshold) > 0.00000000000001) { WriteLine($" ** reference lowThresh={reference.LowThreshold} different to calculated {model.LowThreshold}"); failed = true; } if (Math.Abs(model.HighThreshold - reference.HighThreshold) > 0.00000000000001) { WriteLine($" ** reference highThresh={reference.HighThreshold} different to calculated {model.HighThreshold}"); failed = true; } // make sure individual hash bit contributions match var mbits = model.Contributions; var rbits = reference.Contributions; if (mbits.Count != rbits.Count) { WriteLine($" ** model has {mbits.Count} contribution bits, reference has {rbits.Count}"); failed = true; } foreach (var h in mbits.Keys) { if (!rbits.ContainsKey(h)) { WriteLine($" ** model hash bit {h} not found in reference"); failed = true; break; // one is enough } } foreach (var h in rbits.Keys) { if (!mbits.ContainsKey(h)) { WriteLine($" ** reference hash bit {h} not found in model"); failed = true; break; // one is enough } } foreach (var h in mbits.Keys) { if (rbits.ContainsKey(h)) { double c1 = mbits[h], c2 = rbits[h]; if (!DblEqual(c1, c2)) { WriteLine($" ** contribution for bit {h}: model={c1}, reference={c2}"); failed = true; break; // one is enough } } } if (failed) { throw new CDKException("Comparison to reference failed"); } } catch (CDKException) { throw; } catch (Exception ex) { throw new CDKException("Test failed", ex); } }
// builds a model and uses the scaled predictions to rack up a confusion matrix, for comparison private static void ConfirmPredictions(string sdfile, int truePos, int trueNeg, int falsePos, int falseNeg) { WriteLine($"[{sdfile}] comparing confusion matrix"); var molecules = new List <IAtomContainer>(); var activities = new List <bool>(); var model = new Bayesian(CircularFingerprinterClass.ECFP6, 1024); try { using (var ins = ResourceLoader.GetAsStream("NCDK.Data.CDD." + sdfile)) { var rdr = new EnumerableSDFReader(ins, ChemObjectBuilder.Instance); foreach (var mol in rdr) { bool actv = "true" == (string)mol.GetProperties()["Active"]; molecules.Add(mol); activities.Add(actv); model.AddMolecule(mol, actv); } } } catch (CDKException) { throw; } catch (Exception ex) { throw new CDKException("Test failed", ex); } model.Build(); model.ValidateLeaveOneOut(); // build the confusion matrix int gotTP = 0, gotTN = 0, gotFP = 0, gotFN = 0; for (int n = 0; n < molecules.Count; n++) { double pred = model.ScalePredictor(model.Predict(molecules[n])); bool actv = activities[n]; if (pred >= 0.5) { if (actv) { gotTP++; } else { gotFP++; } } else { if (actv) { gotFN++; } else { gotTN++; } } } WriteLine(" True Positives: got=" + gotTP + " require=" + truePos); WriteLine(" Negatives: got=" + gotTN + " require=" + trueNeg); WriteLine(" False Positives: got=" + gotFP + " require=" + falsePos); WriteLine(" Negatives: got=" + gotFN + " require=" + falseNeg); if (gotTP != truePos || gotTN != trueNeg || gotFP != falsePos || gotFN != falseNeg) { throw new CDKException("Confusion matrix mismatch"); } }