/// <summary> /// Evaluates the test results. /// </summary> /// <param name="testResults">Set of test results.</param> /// <param name="_testName">Descriptive name to be attached at results report.</param> /// <param name="logger">The logger - to log any problems, if occourred.</param> /// <param name="averagingMethod">The averaging method.</param> /// <returns></returns> public classificationReport EvaluateTestResults(List <FeatureVectorWithLabelID> testResults, String _testName, ILogBuilder logger, classificationMetricComputation averagingMethod = classificationMetricComputation.macroAveraging) { //classificationReport report = new classificationReport() classificationReport report = new classificationReport(_testName); classificationEvalMetricSet metric = EvaluateTestResultsToMetricSet(testResults, _testName, logger); report.GetSetMetrics(metric); report.AddValues(metric, averagingMethod); return(report); }
public classificationReport EvaluateTestResults(List <FeatureVectorWithLabelID> testResults, String _testName, ILogBuilder logger) { classificationReport cReport = truthTable.EvaluateTestResults(testResults, _testName, notes); // testSummaries.Add(cReport); //testSummaries.Add(cReport.GetSummary("Sum of " + _testName)); //classificationEval summary = metric.GetSummary("Sum of " + _testName); //testSummaries.Add(summary); //classificationReport report = new classificationReport() return(cReport); }
public IPlaneContext ExecutePlaneMethod(IPlaneContext inputContext, ExperimentModelExecutionContext generalContext, ILogBuilder logger) { //if (generalContext == null) //{ // generalContext = new PlanesMethodContext(); //} IEntityPlaneContext entityInputContext = inputContext as IEntityPlaneContext; ICorpusPlaneContext entityContext = EntityMethod.ExecutePlaneMethod(inputContext, generalContext, logger) as ICorpusPlaneContext; IVectorPlaneContext corpusContext = CorpusMethod.ExecutePlaneMethod(entityContext, generalContext, logger) as IVectorPlaneContext; IFeaturePlaneContext vectorContext = VectorMethod.ExecutePlaneMethod(corpusContext, generalContext, logger) as IFeaturePlaneContext; IFeaturePlaneContext featureContext = FeatureMethod.ExecutePlaneMethod(vectorContext, generalContext, logger) as IFeaturePlaneContext; // --- the results reporting var evaluationMetrics = generalContext.truthTable.EvaluateTestResultsToMetricSet(featureContext.testResults, generalContext.runName + "-" + notes.folder.name, logger); DataTableTypeExtended <classificationEval> inclassEvalTable = new DataTableTypeExtended <classificationEval>("inclass_evaluation", "Test results, per class"); evaluationMetrics.GetAllEntries().ForEach(x => inclassEvalTable.AddRow(x)); inclassEvalTable.AddRow(evaluationMetrics.GetSummary("Sum")); notes.SaveDataTable(inclassEvalTable, notes.folder_classification); classificationReport averagedReport = new classificationReport(evaluationMetrics, generalContext.averagingMethod); averagedReport.Classifier = FeatureMethod.classifier.name; averagedReport.saveObjectToXML(notes.folder_classification.pathFor(averagedReport.Name + ".xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Serialized classification evaluation results summary")); generalContext.testSummaries.Add(averagedReport); averagedReport.ReportToLog(notes); featureContext.provider.Dispose(); EntityMethod.CacheProvider.Dispose(); return(generalContext); }
public static void SetReportDataFields(this classificationReport report, CrossValidationModel crossValidationModel, ExperimentModelExecutionContext mainContext) { report.data.Add(nameof(ReportDataFieldEnum.DataSetName), mainContext.dataset.name, "Name of dataset used in the experiment"); report.data.Add(nameof(ReportDataFieldEnum.ValidationK), crossValidationModel.GetShortSignature(), "Cross validation model signature"); double testPerFold = 0; double trainingPerFold = 0; Int32 c = 0; foreach (var frep in mainContext.testSummaries) { testPerFold += frep.Targets; c++; } trainingPerFold = testPerFold; testPerFold = testPerFold.GetRatio(c); trainingPerFold = trainingPerFold - testPerFold; report.data.Add(nameof(ReportDataFieldEnum.TestSetCount), testPerFold.ToString("F2"), "Average number of test instances per fold"); report.data.Add(nameof(ReportDataFieldEnum.TrainingSetCount), trainingPerFold.ToString("F2"), "Average number of training instances per fold"); }
public static void SetReportDataFields(this classificationReport report, OperationContext context, Boolean afterFeatureSelection = false) { if (!afterFeatureSelection) { report.data.Add(nameof(ReportDataFieldEnum.labeled_terms), context.spaceModel.terms_known_label.Count.ToString(), "Number of labeled input terms"); report.data.Add(nameof(ReportDataFieldEnum.unlabeled_terms), context.spaceModel.terms_unknown_label.Count.ToString(), "Number of unlabeled input terms"); } else { report.data.Add(nameof(ReportDataFieldEnum.labeled_selected_terms), context.spaceModel.terms_known_label.Count.ToString(), "Number of labeled selected terms"); report.data.Add(nameof(ReportDataFieldEnum.unlabeled_selected_terms), context.spaceModel.terms_unknown_label.Count.ToString(), "Number of unlabeled selected terms"); report.data.Add(nameof(ReportDataFieldEnum.SelectedFeatures), context.SelectedFeatures.Count.ToString(), "Number of selected features"); rangeFinder ranger = new rangeFinder(); foreach (var pair in context.SelectedFeatures.index) { ranger.Learn(pair.Value.weight); } report.data.Add(nameof(ReportDataFieldEnum.SelectedFeatureMin), ranger.Minimum.ToString("F5"), "Smallest weight of a selected feature"); } }
public static void SetReportDataFields(this classificationReport report, DatasetStructureReport context) { report.data.Add(nameof(ReportDataFieldEnum.PagePerSite), context.pagesPerSite.ToString("F2"), "Pages per web site instance"); }
public static void SetReportDataFields(this classificationReport report, IClassifier classifier, FeatureFilter filter, FeatureWeightModel featureWeight) { report.data.Add(nameof(ReportDataFieldEnum.Classifier), classifier.GetSignature(), "Signature of the classification algorithm"); report.data.Add(nameof(ReportDataFieldEnum.FeatureSelection), filter.GetSignature(), "Signature of feature selection filter model"); report.data.Add(nameof(ReportDataFieldEnum.FeatureWeighting), featureWeight.GetSignature(), "Signature of feature weight model"); }
public static void SetReportDataFields(this classificationReport report, WeightDictionary selected) { // report.data.Add(nameof(ReportDataFieldEnum.PagePerSite), classifier.GetSignature(), "Signature of the classification algorithm"); report.data.Add(nameof(ReportDataFieldEnum.SelectedFeatures), selected.Count.ToString(), "Number of selected features"); // report.data.Add(nameof(ReportDataFieldEnum.FeatureWeighting), featureWeight.GetSignature(), "Signature of feature weight model"); }
/// <summary> /// Generates the final reports and read me files /// </summary> public void CloseExperiment(ILogBuilder logger, long startOfLog) { if (!testSummaries.Any()) { logger.log("No experiment procedures performes"); return; } DataTableTypeExtended <classificationReport> summaryTable = new DataTableTypeExtended <classificationReport>("Test results", "k-fold cross valudation results"); classificationReport sumRow = new classificationReport(runName); sumRow.Comment = runName + ", " + description; // classificationEvalMetricSet metric = new classificationEvalMetricSet("Total", truthTable.labels_without_unknown); foreach (classificationReport s in testSummaries) { summaryTable.AddRow(s); //metric = metric + s; if (sumRow.Classifier.isNullOrEmpty()) { sumRow.Classifier = s.Classifier; } sumRow.AddValues(s); } sumRow.DivideValues(testSummaries.Count); sumRow.SetReportDataFields(crossValidation, this); summaryTable.SetDescription(description); summaryTable.SetAdditionalInfoEntry("RunName", runName); summaryTable.SetAdditionalInfoEntry("Description", description); summaryTable.SetAdditionalInfoEntry("Averaging", averagingMethod.ToString()); summaryTable.AddRow(sumRow); summaryTable.GetReportAndSave(notes.folder, signature); finalReport = sumRow; //sumRow.ReportToLog(logger); sumRow.ReportToLog(notes); objectSerialization.saveObjectToXML(sumRow, notes.folder.pathFor("results.xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Experiment results", true)); logger.log("Experiment completed"); notes.SaveNote("note"); String logPrintout = logger.GetContent(startOfLog); String p = notes.folder.pathFor("log.txt", imbSCI.Data.enums.getWritableFileMode.overwrite, "Log printout during experiment execution"); File.WriteAllText(p, logPrintout); experimentRootFolder.generateReadmeFiles(signature); }
public override ExperimentDataSetFoldContextPair <OperationContext> Execute(ILogBuilder logger, OperationContext executionContextMain = null, ExperimentModelExecutionContext executionContextExtra = null) { ExperimentDataSetFoldContextPair <OperationContext> output = new ExperimentDataSetFoldContextPair <OperationContext>(fold, executionContextMain); Open(); if (!setup.documentSelectQuery.PrecompiledScoresFilename.Trim().isNullOrEmpty()) { String precompFile = DocumentSelectResult.CheckAndMakeFilename(setup.documentSelectQuery.PrecompiledScoresFilename); var p = executionContextExtra.resourceProvider.GetResourceFile(precompFile, fold); //var p = executionContextExtra.resourceProvider.folder.findFile(precompFile, SearchOption.AllDirectories); DocumentSelectResult scores = DocumentSelectResult.LoadFromFile(p, logger); // objectSerialization.loadObjectFromXML<DocumentSelectResult>(path, logger); if (scores != null) { scores.SaveReport(fold_notes.folder.pathFor("DSScores_loaded.txt", imbSCI.Data.enums.getWritableFileMode.overwrite)); scores = setup.documentSelectQuery.ExecuteLimit(scores, logger); IEnumerable <string> assignedIDs = scores.items.Select(x => x.AssignedID); scores.SaveReport(fold_notes.folder.pathFor("DSScores_applied.txt", imbSCI.Data.enums.getWritableFileMode.overwrite)); fold.DataSetSubSet(assignedIDs.ToList(), true, true); } else { throw new ArgumentException("DSelection file failed: " + setup.documentSelectQuery.PrecompiledScoresFilename); logger.log(" _ DocumentSelect failed for [" + name + "]"); } } classificationReport tmpReport = new classificationReport(); String dsReportName = fold.name + setup.documentSelectQuery.PrecompiledScoresFilename + setup.documentSelectQuery.SizeLimit; DatasetStructureReport dsReport = DatasetStructureReport.MakeStructureReport(fold, dsReportName); dsReport.Publish(fold_notes.folder, true, true); tmpReport.SetReportDataFields(dsReport); if (!output.context.IsDatasetDeployed) { output.context.DeployDataSet(fold, logger); entityOperation.TextRendering(output.context, notes, requirements.MayUseTextRender); corpusOperation.SpaceModelPopulation(output.context, notes); if (requirements.MayUseSpaceModelCategories) { corpusOperation.SpaceModelCategories(output.context, notes); } } tmpReport.SetReportDataFields(output.context, false); corpusOperation.FeatureSelection(output.context, notes); corpusOperation.VectorSpaceConstruction(output.context, notes, requirements.MayUseVectorSpaceCategories); corpusOperation.FeatureVectorConstruction(output.context, notes); if (setup.reportOptions.HasFlag(OperationReportEnum.randomSampledDemo)) { logger.log("-- generating random sample report"); var data_wm = imbNLP.Toolkit.Reporting.ReportGenerators.MakeWeightModelDemoTable(output.context.spaceModel, corpusOperation.weightModel, output.context.SelectedFeatures, 5, "DemoForWeightModel", "Diagnostic report for picked sample"); data_wm.GetReportAndSave(fold_notes.folder); var data_fs = imbNLP.Toolkit.Reporting.ReportGenerators.MakeWeightModelDemoTable(output.context.spaceModel, corpusOperation.filter.WeightModel, output.context.SelectedFeatures, 5, "DemoForFeatureSelection", "Diagnostic report for feature selection filter sample"); data_fs.GetReportAndSave(fold_notes.folder); } classificationOperation.PerformClassification(output.context, executionContextExtra.truthTable, setup.dataSetMode, notes); corpusOperation.weightModel.DiagnosticDump(fold_notes.folder, logger); //classificationOperation.classifier. classificationEvalMetricSet evaluationMetrics = executionContextExtra.truthTable.EvaluateTestResultsToMetricSet(output.context.testResults, setup.OutputFilename + "-" + notes.folder.name, logger); if (setup.ExportEvaluationAsDocumentSelectionResult) { Toolkit.Feature.FeatureVectorDictionaryWithDimensions dict = executionContextExtra.truthTable.GetEvaluationAsFeatureVectorDictionary(output.context.testResults, setup.OutputFilename, logger, setup.ExportEvaluationCorrectScore, setup.ExportEvaluationIncorrectScore); String out_ds = setup.ExportEvaluationToFilename.Replace("*", ""); dict.Save(fold_notes.folder, out_ds.or(setup.OutputFilename), logger); //executionContextExtra.resourceProvider.folder dict.Save(notes.folder, out_ds.or(setup.OutputFilename), logger); } DataTableTypeExtended <classificationEval> inclassEvalTable = new DataTableTypeExtended <classificationEval>("inclass_evaluation", "Test results, per class"); evaluationMetrics.GetAllEntries().ForEach(x => inclassEvalTable.AddRow(x)); inclassEvalTable.AddRow(evaluationMetrics.GetSummary("Sum")); notes.SaveDataTable(inclassEvalTable, notes.folder_classification); classificationReport averagedReport = new classificationReport(evaluationMetrics, setup.averagingMethod); averagedReport.Classifier = classificationOperation.classifier.GetSignature(); // featureMethod.classifierSettings.name; // FeatureMethod.classifier.name; averagedReport.saveObjectToXML(notes.folder_classification.pathFor(averagedReport.Name + ".xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Serialized classification evaluation results summary")); averagedReport.ReportToLog(notes); averagedReport.SetReportDataFields(output.context, true); averagedReport.data.Merge(tmpReport.data); averagedReport.SetReportDataFields(classificationOperation.classifier, corpusOperation.filter, corpusOperation.weightModel); executionContextExtra.testSummaries.Add(averagedReport); OperationContextReport reportOperation = new OperationContextReport(); reportOperation.DeploySettingsBase(notes); reportOperation.GenerateReports(output.context, setup.reportOptions, notes); /* * if (setup.reportOptions.HasFlag(OperationReportEnum.reportClassification)) * { * * Dictionary<string, List<FeatureVectorWithLabelID>> byCategory = executionContextExtra.truthTable.GroupByTrueCategory(executionContextMain.testResults); * * objectTable<classificationReport> tbl = new objectTable<classificationReport>(nameof(classificationReport.Name), "inclass_" + executionContextExtra.runName); * classificationReport macroAverage = new classificationReport("AVG-" + executionContextExtra.runName); * foreach (KeyValuePair<string, List<FeatureVectorWithLabelID>> pair in byCategory) * { * var cReport = executionContextExtra.EvaluateTestResults(pair.Value, pair.Key + "-" + executionContextExtra.runName, logger); * * cReport.Classifier = classificationOperation.classifier.GetSignature(); // classifier.name; * cReport.Comment = "Tr/Ts [" + executionContextMain.trainingSet.Count + "]:[" + executionContextMain.testSet.Count + "]"; * String path = notes.folder_classification.pathFor(pair.Key + "_result.xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Serialized evaluation result within category [" + pair.Key + "]", true); * * macroAverage.AddValues(cReport); * * tbl.Add(cReport); * } * // macroAverage.DivideValues(byCategory.Keys.Count); * * tbl.Add(macroAverage); * * notes.SaveDataTable(tbl.GetDataTable(), notes.folder_classification); * * }*/ Close(); return(output); }