// make sure auxiliary fields like title & comments can serialise/deserialise private void CheckTextFields() { WriteLine("Checking integrity of text fields"); var dummyTitle = "some title"; var dummyOrigin = "some origin"; var dummyComments = new string[] { "comment1", "comment2" }; var model1 = new Bayesian(CircularFingerprinterClass.ECFP6) { NoteTitle = dummyTitle, NoteOrigin = dummyOrigin, NoteComments = dummyComments }; Bayesian model2 = null; try { model2 = Bayesian.Deserialise(model1.Serialise()); } catch (IOException ex) { throw new CDKException("Reserialisation failed", ex); } if (!dummyTitle.Equals(model1.NoteTitle, StringComparison.Ordinal) || !dummyTitle.Equals(model2.NoteTitle, StringComparison.Ordinal) || !dummyOrigin.Equals(model1.NoteOrigin, StringComparison.Ordinal) || !dummyOrigin.Equals(model2.NoteOrigin, StringComparison.Ordinal)) { throw new CDKException("Note integrity failure for origin"); } var comments1 = model1.NoteComments; var comments2 = model2.NoteComments; if (comments1.Count != dummyComments.Length || comments2.Count != dummyComments.Length || !comments1[0].Equals(dummyComments[0], StringComparison.Ordinal) || !comments2[0].Equals(dummyComments[0], StringComparison.Ordinal) || !comments1[1].Equals(dummyComments[1], StringComparison.Ordinal) || !comments2[1].Equals(dummyComments[1], StringComparison.Ordinal)) { throw new CDKException("Note integrity failure for origin"); } }
private void RunTest(string sdfile, string actvField, CircularFingerprinterClass classType, int folding, int xval, string modelFN, bool perceiveStereo) { WriteLine("[" + modelFN + "]"); WriteLine(" Loading " + sdfile); try { var model = new Bayesian(classType, folding) { PerceiveStereo = perceiveStereo }; int row = 0, numActives = 0; using (var rdr = new EnumerableSDFReader(ResourceLoader.GetAsStream($"NCDK.Data.CDD.{sdfile}"), ChemObjectBuilder.Instance)) { foreach (var mol in rdr) { row++; var stractv = (string)mol.GetProperties()[actvField]; int active = stractv.Equals("true", StringComparison.Ordinal) ? 1 : stractv.Equals("false", StringComparison.Ordinal) ? 0 : int.Parse(stractv, NumberFormatInfo.InvariantInfo); if (active != 0 && active != 1) { throw new CDKException("Activity field not found or invalid"); } model.AddMolecule(mol, active == 1); numActives += active; } } WriteLine($" Training with {row} rows, {numActives} actives, {(row - numActives)} inactives"); model.Build(); if (xval == 3) { model.ValidateThreeFold(); } else if (xval == 5) { model.ValidateFiveFold(); } else { model.ValidateLeaveOneOut(); } WriteLine($" Validation: ROC AUC={model.RocAUC}"); WriteLine($" Parsing reference model"); //FileReader frdr=new FileReader(modelFN); Bayesian reference; using (var mrdr = new StreamReader(ResourceLoader.GetAsStream($"NCDK.Data.CDD.{modelFN}"))) { reference = Bayesian.Deserialise(mrdr); } // start comparing the details... bool failed = false; if (model.Folding != reference.Folding) { WriteLine($" ** reference folding size={reference.Folding}"); failed = true; } if (model.TrainingSize != reference.TrainingSize) { WriteLine($" ** reference training size={reference.TrainingSize}"); failed = true; } if (model.TrainingActives != reference.TrainingActives) { WriteLine($" ** reference training actives={reference.TrainingActives}"); failed = true; } if (model.RocType != reference.RocType) { WriteLine($" ** reference ROC type={reference.RocType}"); failed = true; } if (!DblEqual(model.RocAUC, reference.RocAUC)) { WriteLine($" ** reference ROC AUC={reference.RocAUC}"); failed = true; } if (Math.Abs(model.LowThreshold - reference.LowThreshold) > 0.00000000000001) { WriteLine($" ** reference lowThresh={reference.LowThreshold} different to calculated {model.LowThreshold}"); failed = true; } if (Math.Abs(model.HighThreshold - reference.HighThreshold) > 0.00000000000001) { WriteLine($" ** reference highThresh={reference.HighThreshold} different to calculated {model.HighThreshold}"); failed = true; } // make sure individual hash bit contributions match var mbits = model.Contributions; var rbits = reference.Contributions; if (mbits.Count != rbits.Count) { WriteLine($" ** model has {mbits.Count} contribution bits, reference has {rbits.Count}"); failed = true; } foreach (var h in mbits.Keys) { if (!rbits.ContainsKey(h)) { WriteLine($" ** model hash bit {h} not found in reference"); failed = true; break; // one is enough } } foreach (var h in rbits.Keys) { if (!mbits.ContainsKey(h)) { WriteLine($" ** reference hash bit {h} not found in model"); failed = true; break; // one is enough } } foreach (var h in mbits.Keys) { if (rbits.ContainsKey(h)) { double c1 = mbits[h], c2 = rbits[h]; if (!DblEqual(c1, c2)) { WriteLine($" ** contribution for bit {h}: model={c1}, reference={c2}"); failed = true; break; // one is enough } } } if (failed) { throw new CDKException("Comparison to reference failed"); } } catch (CDKException) { throw; } catch (Exception ex) { throw new CDKException("Test failed", ex); } }