Ejemplo n.º 1
0
        /// <summary>
        /// Evaluates the test results.
        /// </summary>
        /// <param name="testResults">Set of test results.</param>
        /// <param name="_testName">Descriptive name to be attached at results report.</param>
        /// <param name="logger">The logger - to log any problems, if occourred.</param>
        /// <param name="averagingMethod">The averaging method.</param>
        /// <returns></returns>
        public classificationReport EvaluateTestResults(List <FeatureVectorWithLabelID> testResults, String _testName, ILogBuilder logger, classificationMetricComputation averagingMethod = classificationMetricComputation.macroAveraging)
        {
            //classificationReport report = new classificationReport()

            classificationReport report = new classificationReport(_testName);

            classificationEvalMetricSet metric = EvaluateTestResultsToMetricSet(testResults, _testName, logger);

            report.GetSetMetrics(metric);
            report.AddValues(metric, averagingMethod);

            return(report);
        }
        public classificationReport EvaluateTestResults(List <FeatureVectorWithLabelID> testResults, String _testName, ILogBuilder logger)
        {
            classificationReport cReport = truthTable.EvaluateTestResults(testResults, _testName, notes);

            //  testSummaries.Add(cReport);

            //testSummaries.Add(cReport.GetSummary("Sum of " + _testName));

            //classificationEval summary = metric.GetSummary("Sum of " + _testName);
            //testSummaries.Add(summary);
            //classificationReport report = new classificationReport()
            return(cReport);
        }
Ejemplo n.º 3
0
        public IPlaneContext ExecutePlaneMethod(IPlaneContext inputContext, ExperimentModelExecutionContext generalContext, ILogBuilder logger)
        {
            //if (generalContext == null)
            //{
            //    generalContext = new PlanesMethodContext();
            //}
            IEntityPlaneContext entityInputContext = inputContext as IEntityPlaneContext;


            ICorpusPlaneContext entityContext = EntityMethod.ExecutePlaneMethod(inputContext, generalContext, logger) as ICorpusPlaneContext;

            IVectorPlaneContext corpusContext = CorpusMethod.ExecutePlaneMethod(entityContext, generalContext, logger) as IVectorPlaneContext;

            IFeaturePlaneContext vectorContext = VectorMethod.ExecutePlaneMethod(corpusContext, generalContext, logger) as IFeaturePlaneContext;

            IFeaturePlaneContext featureContext = FeatureMethod.ExecutePlaneMethod(vectorContext, generalContext, logger) as IFeaturePlaneContext;

            // --- the results reporting

            var evaluationMetrics = generalContext.truthTable.EvaluateTestResultsToMetricSet(featureContext.testResults, generalContext.runName + "-" + notes.folder.name, logger);

            DataTableTypeExtended <classificationEval> inclassEvalTable = new DataTableTypeExtended <classificationEval>("inclass_evaluation", "Test results, per class");

            evaluationMetrics.GetAllEntries().ForEach(x => inclassEvalTable.AddRow(x));
            inclassEvalTable.AddRow(evaluationMetrics.GetSummary("Sum"));
            notes.SaveDataTable(inclassEvalTable, notes.folder_classification);

            classificationReport averagedReport = new classificationReport(evaluationMetrics, generalContext.averagingMethod);

            averagedReport.Classifier = FeatureMethod.classifier.name;
            averagedReport.saveObjectToXML(notes.folder_classification.pathFor(averagedReport.Name + ".xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Serialized classification evaluation results summary"));

            generalContext.testSummaries.Add(averagedReport);

            averagedReport.ReportToLog(notes);

            featureContext.provider.Dispose();
            EntityMethod.CacheProvider.Dispose();

            return(generalContext);
        }
Ejemplo n.º 4
0
        public static void SetReportDataFields(this classificationReport report, CrossValidationModel crossValidationModel, ExperimentModelExecutionContext mainContext)
        {
            report.data.Add(nameof(ReportDataFieldEnum.DataSetName), mainContext.dataset.name, "Name of dataset used in the experiment");
            report.data.Add(nameof(ReportDataFieldEnum.ValidationK), crossValidationModel.GetShortSignature(), "Cross validation model signature");

            double testPerFold     = 0;
            double trainingPerFold = 0;
            Int32  c = 0;

            foreach (var frep in mainContext.testSummaries)
            {
                testPerFold += frep.Targets;
                c++;
            }

            trainingPerFold = testPerFold;
            testPerFold     = testPerFold.GetRatio(c);
            trainingPerFold = trainingPerFold - testPerFold;

            report.data.Add(nameof(ReportDataFieldEnum.TestSetCount), testPerFold.ToString("F2"), "Average number of test instances per fold");
            report.data.Add(nameof(ReportDataFieldEnum.TrainingSetCount), trainingPerFold.ToString("F2"), "Average number of training instances per fold");
        }
        public static void SetReportDataFields(this classificationReport report, OperationContext context, Boolean afterFeatureSelection = false)
        {
            if (!afterFeatureSelection)
            {
                report.data.Add(nameof(ReportDataFieldEnum.labeled_terms), context.spaceModel.terms_known_label.Count.ToString(), "Number of labeled input terms");
                report.data.Add(nameof(ReportDataFieldEnum.unlabeled_terms), context.spaceModel.terms_unknown_label.Count.ToString(), "Number of unlabeled input terms");
            }
            else
            {
                report.data.Add(nameof(ReportDataFieldEnum.labeled_selected_terms), context.spaceModel.terms_known_label.Count.ToString(), "Number of labeled selected terms");
                report.data.Add(nameof(ReportDataFieldEnum.unlabeled_selected_terms), context.spaceModel.terms_unknown_label.Count.ToString(), "Number of unlabeled selected terms");

                report.data.Add(nameof(ReportDataFieldEnum.SelectedFeatures), context.SelectedFeatures.Count.ToString(), "Number of selected features");

                rangeFinder ranger = new rangeFinder();

                foreach (var pair in context.SelectedFeatures.index)
                {
                    ranger.Learn(pair.Value.weight);
                }

                report.data.Add(nameof(ReportDataFieldEnum.SelectedFeatureMin), ranger.Minimum.ToString("F5"), "Smallest weight of a selected feature");
            }
        }
 public static void SetReportDataFields(this classificationReport report, DatasetStructureReport context)
 {
     report.data.Add(nameof(ReportDataFieldEnum.PagePerSite), context.pagesPerSite.ToString("F2"), "Pages per web site instance");
 }
Ejemplo n.º 7
0
 public static void SetReportDataFields(this classificationReport report, IClassifier classifier, FeatureFilter filter, FeatureWeightModel featureWeight)
 {
     report.data.Add(nameof(ReportDataFieldEnum.Classifier), classifier.GetSignature(), "Signature of the classification algorithm");
     report.data.Add(nameof(ReportDataFieldEnum.FeatureSelection), filter.GetSignature(), "Signature of feature selection filter model");
     report.data.Add(nameof(ReportDataFieldEnum.FeatureWeighting), featureWeight.GetSignature(), "Signature of feature weight model");
 }
Ejemplo n.º 8
0
 public static void SetReportDataFields(this classificationReport report, WeightDictionary selected)
 {
     // report.data.Add(nameof(ReportDataFieldEnum.PagePerSite), classifier.GetSignature(), "Signature of the classification algorithm");
     report.data.Add(nameof(ReportDataFieldEnum.SelectedFeatures), selected.Count.ToString(), "Number of selected features");
     //  report.data.Add(nameof(ReportDataFieldEnum.FeatureWeighting), featureWeight.GetSignature(), "Signature of feature weight model");
 }
        /// <summary>
        /// Generates the final reports and read me files
        /// </summary>
        public void CloseExperiment(ILogBuilder logger, long startOfLog)
        {
            if (!testSummaries.Any())
            {
                logger.log("No experiment procedures performes");

                return;
            }
            DataTableTypeExtended <classificationReport> summaryTable = new DataTableTypeExtended <classificationReport>("Test results", "k-fold cross valudation results");

            classificationReport sumRow = new classificationReport(runName);

            sumRow.Comment = runName + ", " + description;


            //    classificationEvalMetricSet metric = new classificationEvalMetricSet("Total", truthTable.labels_without_unknown);

            foreach (classificationReport s in testSummaries)
            {
                summaryTable.AddRow(s);
                //metric = metric + s;

                if (sumRow.Classifier.isNullOrEmpty())
                {
                    sumRow.Classifier = s.Classifier;
                }

                sumRow.AddValues(s);
            }



            sumRow.DivideValues(testSummaries.Count);

            sumRow.SetReportDataFields(crossValidation, this);

            summaryTable.SetDescription(description);

            summaryTable.SetAdditionalInfoEntry("RunName", runName);
            summaryTable.SetAdditionalInfoEntry("Description", description);
            summaryTable.SetAdditionalInfoEntry("Averaging", averagingMethod.ToString());

            summaryTable.AddRow(sumRow);



            summaryTable.GetReportAndSave(notes.folder, signature);

            finalReport = sumRow;

            //sumRow.ReportToLog(logger);
            sumRow.ReportToLog(notes);

            objectSerialization.saveObjectToXML(sumRow, notes.folder.pathFor("results.xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Experiment results", true));


            logger.log("Experiment completed");

            notes.SaveNote("note");

            String logPrintout = logger.GetContent(startOfLog);
            String p           = notes.folder.pathFor("log.txt", imbSCI.Data.enums.getWritableFileMode.overwrite, "Log printout during experiment execution");

            File.WriteAllText(p, logPrintout);

            experimentRootFolder.generateReadmeFiles(signature);
        }
        public override ExperimentDataSetFoldContextPair <OperationContext> Execute(ILogBuilder logger, OperationContext executionContextMain = null, ExperimentModelExecutionContext executionContextExtra = null)
        {
            ExperimentDataSetFoldContextPair <OperationContext> output = new ExperimentDataSetFoldContextPair <OperationContext>(fold, executionContextMain);

            Open();


            if (!setup.documentSelectQuery.PrecompiledScoresFilename.Trim().isNullOrEmpty())
            {
                String precompFile = DocumentSelectResult.CheckAndMakeFilename(setup.documentSelectQuery.PrecompiledScoresFilename);

                var p = executionContextExtra.resourceProvider.GetResourceFile(precompFile, fold);

                //var p = executionContextExtra.resourceProvider.folder.findFile(precompFile, SearchOption.AllDirectories);

                DocumentSelectResult scores = DocumentSelectResult.LoadFromFile(p, logger);  // objectSerialization.loadObjectFromXML<DocumentSelectResult>(path, logger);

                if (scores != null)
                {
                    scores.SaveReport(fold_notes.folder.pathFor("DSScores_loaded.txt", imbSCI.Data.enums.getWritableFileMode.overwrite));

                    scores = setup.documentSelectQuery.ExecuteLimit(scores, logger);

                    IEnumerable <string> assignedIDs = scores.items.Select(x => x.AssignedID);

                    scores.SaveReport(fold_notes.folder.pathFor("DSScores_applied.txt", imbSCI.Data.enums.getWritableFileMode.overwrite));

                    fold.DataSetSubSet(assignedIDs.ToList(), true, true);
                }
                else
                {
                    throw new ArgumentException("DSelection file failed: " + setup.documentSelectQuery.PrecompiledScoresFilename);

                    logger.log(" _ DocumentSelect failed for [" + name + "]");
                }
            }

            classificationReport tmpReport = new classificationReport();

            String dsReportName = fold.name + setup.documentSelectQuery.PrecompiledScoresFilename + setup.documentSelectQuery.SizeLimit;


            DatasetStructureReport dsReport = DatasetStructureReport.MakeStructureReport(fold, dsReportName);

            dsReport.Publish(fold_notes.folder, true, true);

            tmpReport.SetReportDataFields(dsReport);

            if (!output.context.IsDatasetDeployed)
            {
                output.context.DeployDataSet(fold, logger);

                entityOperation.TextRendering(output.context, notes, requirements.MayUseTextRender);


                corpusOperation.SpaceModelPopulation(output.context, notes);

                if (requirements.MayUseSpaceModelCategories)
                {
                    corpusOperation.SpaceModelCategories(output.context, notes);
                }
            }

            tmpReport.SetReportDataFields(output.context, false);

            corpusOperation.FeatureSelection(output.context, notes);


            corpusOperation.VectorSpaceConstruction(output.context, notes, requirements.MayUseVectorSpaceCategories);

            corpusOperation.FeatureVectorConstruction(output.context, notes);


            if (setup.reportOptions.HasFlag(OperationReportEnum.randomSampledDemo))
            {
                logger.log("-- generating random sample report");
                var data_wm = imbNLP.Toolkit.Reporting.ReportGenerators.MakeWeightModelDemoTable(output.context.spaceModel, corpusOperation.weightModel, output.context.SelectedFeatures, 5, "DemoForWeightModel", "Diagnostic report for picked sample");
                data_wm.GetReportAndSave(fold_notes.folder);
                var data_fs = imbNLP.Toolkit.Reporting.ReportGenerators.MakeWeightModelDemoTable(output.context.spaceModel, corpusOperation.filter.WeightModel, output.context.SelectedFeatures, 5, "DemoForFeatureSelection", "Diagnostic report for feature selection filter sample");
                data_fs.GetReportAndSave(fold_notes.folder);
            }

            classificationOperation.PerformClassification(output.context, executionContextExtra.truthTable, setup.dataSetMode, notes);


            corpusOperation.weightModel.DiagnosticDump(fold_notes.folder, logger);

            //classificationOperation.classifier.

            classificationEvalMetricSet evaluationMetrics = executionContextExtra.truthTable.EvaluateTestResultsToMetricSet(output.context.testResults, setup.OutputFilename + "-" + notes.folder.name, logger);

            if (setup.ExportEvaluationAsDocumentSelectionResult)
            {
                Toolkit.Feature.FeatureVectorDictionaryWithDimensions dict = executionContextExtra.truthTable.GetEvaluationAsFeatureVectorDictionary(output.context.testResults, setup.OutputFilename, logger, setup.ExportEvaluationCorrectScore, setup.ExportEvaluationIncorrectScore);
                String out_ds = setup.ExportEvaluationToFilename.Replace("*", "");
                dict.Save(fold_notes.folder, out_ds.or(setup.OutputFilename), logger);
                //executionContextExtra.resourceProvider.folder
                dict.Save(notes.folder, out_ds.or(setup.OutputFilename), logger);
            }


            DataTableTypeExtended <classificationEval> inclassEvalTable = new DataTableTypeExtended <classificationEval>("inclass_evaluation", "Test results, per class");

            evaluationMetrics.GetAllEntries().ForEach(x => inclassEvalTable.AddRow(x));
            inclassEvalTable.AddRow(evaluationMetrics.GetSummary("Sum"));
            notes.SaveDataTable(inclassEvalTable, notes.folder_classification);

            classificationReport averagedReport = new classificationReport(evaluationMetrics, setup.averagingMethod);

            averagedReport.Classifier = classificationOperation.classifier.GetSignature(); // featureMethod.classifierSettings.name; // FeatureMethod.classifier.name;
            averagedReport.saveObjectToXML(notes.folder_classification.pathFor(averagedReport.Name + ".xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Serialized classification evaluation results summary"));
            averagedReport.ReportToLog(notes);

            averagedReport.SetReportDataFields(output.context, true);
            averagedReport.data.Merge(tmpReport.data);

            averagedReport.SetReportDataFields(classificationOperation.classifier, corpusOperation.filter, corpusOperation.weightModel);



            executionContextExtra.testSummaries.Add(averagedReport);



            OperationContextReport reportOperation = new OperationContextReport();

            reportOperation.DeploySettingsBase(notes);

            reportOperation.GenerateReports(output.context, setup.reportOptions, notes);

            /*
             * if (setup.reportOptions.HasFlag(OperationReportEnum.reportClassification))
             * {
             *
             *  Dictionary<string, List<FeatureVectorWithLabelID>> byCategory = executionContextExtra.truthTable.GroupByTrueCategory(executionContextMain.testResults);
             *
             *  objectTable<classificationReport> tbl = new objectTable<classificationReport>(nameof(classificationReport.Name), "inclass_" + executionContextExtra.runName);
             *  classificationReport macroAverage = new classificationReport("AVG-" + executionContextExtra.runName);
             *  foreach (KeyValuePair<string, List<FeatureVectorWithLabelID>> pair in byCategory)
             *  {
             *      var cReport = executionContextExtra.EvaluateTestResults(pair.Value, pair.Key + "-" + executionContextExtra.runName, logger);
             *
             *      cReport.Classifier = classificationOperation.classifier.GetSignature(); // classifier.name;
             *      cReport.Comment = "Tr/Ts [" + executionContextMain.trainingSet.Count + "]:[" + executionContextMain.testSet.Count + "]";
             *      String path = notes.folder_classification.pathFor(pair.Key + "_result.xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Serialized evaluation result within category [" + pair.Key + "]", true);
             *
             *      macroAverage.AddValues(cReport);
             *
             *      tbl.Add(cReport);
             *  }
             *  //  macroAverage.DivideValues(byCategory.Keys.Count);
             *
             *  tbl.Add(macroAverage);
             *
             *  notes.SaveDataTable(tbl.GetDataTable(), notes.folder_classification);
             *
             * }*/

            Close();

            return(output);
        }