public override ExperimentDataSetFoldContextPair <OperationContext> Execute(ILogBuilder logger, OperationContext executionContextMain = null, ExperimentModelExecutionContext executionContextExtra = null) { ExperimentDataSetFoldContextPair <OperationContext> output = new ExperimentDataSetFoldContextPair <OperationContext>(fold, executionContextMain); Open(); if (!setup.documentSelectQuery.PrecompiledScoresFilename.Trim().isNullOrEmpty()) { String precompFile = DocumentSelectResult.CheckAndMakeFilename(setup.documentSelectQuery.PrecompiledScoresFilename); var p = executionContextExtra.resourceProvider.GetResourceFile(precompFile, fold); //var p = executionContextExtra.resourceProvider.folder.findFile(precompFile, SearchOption.AllDirectories); DocumentSelectResult scores = DocumentSelectResult.LoadFromFile(p, logger); // objectSerialization.loadObjectFromXML<DocumentSelectResult>(path, logger); if (scores != null) { scores.SaveReport(fold_notes.folder.pathFor("DSScores_loaded.txt", imbSCI.Data.enums.getWritableFileMode.overwrite)); scores = setup.documentSelectQuery.ExecuteLimit(scores, logger); IEnumerable <string> assignedIDs = scores.items.Select(x => x.AssignedID); scores.SaveReport(fold_notes.folder.pathFor("DSScores_applied.txt", imbSCI.Data.enums.getWritableFileMode.overwrite)); fold.DataSetSubSet(assignedIDs.ToList(), true, true); } else { throw new ArgumentException("DSelection file failed: " + setup.documentSelectQuery.PrecompiledScoresFilename); logger.log(" _ DocumentSelect failed for [" + name + "]"); } } classificationReport tmpReport = new classificationReport(); String dsReportName = fold.name + setup.documentSelectQuery.PrecompiledScoresFilename + setup.documentSelectQuery.SizeLimit; DatasetStructureReport dsReport = DatasetStructureReport.MakeStructureReport(fold, dsReportName); dsReport.Publish(fold_notes.folder, true, true); tmpReport.SetReportDataFields(dsReport); if (!output.context.IsDatasetDeployed) { output.context.DeployDataSet(fold, logger); entityOperation.TextRendering(output.context, notes, requirements.MayUseTextRender); corpusOperation.SpaceModelPopulation(output.context, notes); if (requirements.MayUseSpaceModelCategories) { corpusOperation.SpaceModelCategories(output.context, notes); } } tmpReport.SetReportDataFields(output.context, false); corpusOperation.FeatureSelection(output.context, notes); corpusOperation.VectorSpaceConstruction(output.context, notes, requirements.MayUseVectorSpaceCategories); corpusOperation.FeatureVectorConstruction(output.context, notes); if (setup.reportOptions.HasFlag(OperationReportEnum.randomSampledDemo)) { logger.log("-- generating random sample report"); var data_wm = imbNLP.Toolkit.Reporting.ReportGenerators.MakeWeightModelDemoTable(output.context.spaceModel, corpusOperation.weightModel, output.context.SelectedFeatures, 5, "DemoForWeightModel", "Diagnostic report for picked sample"); data_wm.GetReportAndSave(fold_notes.folder); var data_fs = imbNLP.Toolkit.Reporting.ReportGenerators.MakeWeightModelDemoTable(output.context.spaceModel, corpusOperation.filter.WeightModel, output.context.SelectedFeatures, 5, "DemoForFeatureSelection", "Diagnostic report for feature selection filter sample"); data_fs.GetReportAndSave(fold_notes.folder); } classificationOperation.PerformClassification(output.context, executionContextExtra.truthTable, setup.dataSetMode, notes); corpusOperation.weightModel.DiagnosticDump(fold_notes.folder, logger); //classificationOperation.classifier. classificationEvalMetricSet evaluationMetrics = executionContextExtra.truthTable.EvaluateTestResultsToMetricSet(output.context.testResults, setup.OutputFilename + "-" + notes.folder.name, logger); if (setup.ExportEvaluationAsDocumentSelectionResult) { Toolkit.Feature.FeatureVectorDictionaryWithDimensions dict = executionContextExtra.truthTable.GetEvaluationAsFeatureVectorDictionary(output.context.testResults, setup.OutputFilename, logger, setup.ExportEvaluationCorrectScore, setup.ExportEvaluationIncorrectScore); String out_ds = setup.ExportEvaluationToFilename.Replace("*", ""); dict.Save(fold_notes.folder, out_ds.or(setup.OutputFilename), logger); //executionContextExtra.resourceProvider.folder dict.Save(notes.folder, out_ds.or(setup.OutputFilename), logger); } DataTableTypeExtended <classificationEval> inclassEvalTable = new DataTableTypeExtended <classificationEval>("inclass_evaluation", "Test results, per class"); evaluationMetrics.GetAllEntries().ForEach(x => inclassEvalTable.AddRow(x)); inclassEvalTable.AddRow(evaluationMetrics.GetSummary("Sum")); notes.SaveDataTable(inclassEvalTable, notes.folder_classification); classificationReport averagedReport = new classificationReport(evaluationMetrics, setup.averagingMethod); averagedReport.Classifier = classificationOperation.classifier.GetSignature(); // featureMethod.classifierSettings.name; // FeatureMethod.classifier.name; averagedReport.saveObjectToXML(notes.folder_classification.pathFor(averagedReport.Name + ".xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Serialized classification evaluation results summary")); averagedReport.ReportToLog(notes); averagedReport.SetReportDataFields(output.context, true); averagedReport.data.Merge(tmpReport.data); averagedReport.SetReportDataFields(classificationOperation.classifier, corpusOperation.filter, corpusOperation.weightModel); executionContextExtra.testSummaries.Add(averagedReport); OperationContextReport reportOperation = new OperationContextReport(); reportOperation.DeploySettingsBase(notes); reportOperation.GenerateReports(output.context, setup.reportOptions, notes); /* * if (setup.reportOptions.HasFlag(OperationReportEnum.reportClassification)) * { * * Dictionary<string, List<FeatureVectorWithLabelID>> byCategory = executionContextExtra.truthTable.GroupByTrueCategory(executionContextMain.testResults); * * objectTable<classificationReport> tbl = new objectTable<classificationReport>(nameof(classificationReport.Name), "inclass_" + executionContextExtra.runName); * classificationReport macroAverage = new classificationReport("AVG-" + executionContextExtra.runName); * foreach (KeyValuePair<string, List<FeatureVectorWithLabelID>> pair in byCategory) * { * var cReport = executionContextExtra.EvaluateTestResults(pair.Value, pair.Key + "-" + executionContextExtra.runName, logger); * * cReport.Classifier = classificationOperation.classifier.GetSignature(); // classifier.name; * cReport.Comment = "Tr/Ts [" + executionContextMain.trainingSet.Count + "]:[" + executionContextMain.testSet.Count + "]"; * String path = notes.folder_classification.pathFor(pair.Key + "_result.xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Serialized evaluation result within category [" + pair.Key + "]", true); * * macroAverage.AddValues(cReport); * * tbl.Add(cReport); * } * // macroAverage.DivideValues(byCategory.Keys.Count); * * tbl.Add(macroAverage); * * notes.SaveDataTable(tbl.GetDataTable(), notes.folder_classification); * * }*/ Close(); return(output); }
public override ExperimentDataSetFoldContextPair <OperationContext> Execute(ILogBuilder logger, OperationContext executionContextMain = null, ExperimentModelExecutionContext executionContextExtra = null) { ExperimentDataSetFoldContextPair <OperationContext> output = new ExperimentDataSetFoldContextPair <OperationContext>(fold, executionContextMain); Open(); String p_m = FeatureWeightModel.GetModelDefinitionFilename(setup.OutputFilename, fold_notes.folder); String p_d = FeatureWeightModel.GetModelDataFilename(setup.OutputFilename, fold_notes.folder); String w_t = WeightDictionary.GetDictionaryFilename(setup.OutputFilename, fold_notes.folder); Boolean skip = false; if (setup.skipIfExisting) { if (File.Exists(p_m) && File.Exists(p_d) && File.Exists(w_t)) { logger.log("WeightTable [" + p_d + "] found, skipping the operation"); skip = true; } } if (!skip) { output.context.DeployDataSet(fold, logger); entityOperation.TextRendering(output.context, notes); /* * entityOperation.TextPreblendFilter(output.context, notes); * * entityOperation.TextBlending(output.context, notes); */ corpusOperation.SpaceModelPopulation(output.context, notes); corpusOperation.SpaceModelCategories(output.context, notes); corpusOperation.FeatureSelection(output.context, notes, requirements.MayUseSelectedFeatures); output.context.SelectedFeatures.Save(fold_notes.folder, notes, setup.OutputFilename + "_fs"); //corpusOperation.weightModel. corpusOperation.weightModel.PrepareTheModel(output.context.spaceModel, logger); var wt_s = corpusOperation.weightModel.GetElementFactors(output.context.SelectedFeatures.GetKeys(), output.context.spaceModel); wt_s.Save(fold_notes.folder, notes, setup.OutputFilename); corpusOperation.weightModel.Save(setup.OutputFilename, fold_notes.folder, notes); OperationContextReport reportOperation = new OperationContextReport(); reportOperation.DeploySettingsBase(notes); reportOperation.GenerateReports(output.context, setup.reportOptions, notes); } Close(); return(output); }
public override ExperimentDataSetFoldContextPair <OperationContext> Execute(ILogBuilder logger, OperationContext executionContextMain = null, ExperimentModelExecutionContext executionContextExtra = null) { ExperimentDataSetFoldContextPair <OperationContext> output = new ExperimentDataSetFoldContextPair <OperationContext>(fold, executionContextMain); Open(); Boolean skip = false; String fn = setup.OutputFilename; // String p_m = fold_notes.folder.pathFor(fn.ensureEndsWith("_ranking.xml"), imbSCI.Data.enums.getWritableFileMode.none); if (setup.skipIfExisting) { String f_n = DocumentSelectResult.CheckAndMakeFilename(fn); f_n = executionContextExtra.resourceProvider.GetResourceFile(f_n, fold); // .folder.findFile(f_n, SearchOption.AllDirectories); skip = DocumentRankingExtensions.EvaluateSavedDSRanking(f_n, logger, 0.01); } if (!skip) { output.context.DeployDataSet(fold, logger); //if (!output.context.IsTextRendered) //{ entityOperation.TextRendering(output.context, notes); //entityOperation.TextPreblendFilter(output.context, notes); //entityOperation.TextBlending(output.context, notes); // } //if (!output.context.spaceModel.IsModelReady) //{ corpusOperation.SpaceModelPopulation(output.context, notes); corpusOperation.SpaceModelCategories(output.context, notes); corpusOperation.FeatureSelection(output.context, notes, requirements.MayUseSelectedFeatures); corpusOperation.VectorSpaceConstruction(output.context, notes, requirements.MayUseSpaceModelCategories); // } logger.log("Document selection computation"); DocumentSelectResult drmContext = output.context.PrepareContext(ranking, fold_notes.folder, logger); drmContext = ranking.ExecuteEvaluation(drmContext, logger); foreach (String l in setup.descriptionAppendix) { drmContext.description += Environment.NewLine + l; } fn = DocumentSelectResult.CheckAndMakeFilename(fn); fn = executionContextExtra.resourceProvider.SetResourceFilePath(fn, fold); // f_n = executionContextExtra.resourceProvider.folder.pathFor(f_n, imbSCI.Data.enums.getWritableFileMode.overwrite, ""); String xmlModel = objectSerialization.ObjectToXML(drmContext); File.WriteAllText(fn, xmlModel); //corpusOperation.weightModel.PrepareTheModel(output.context.spaceModel); /* * var dataset = corpusOperation.weightModel.SaveModelDataSet(); * * * String fn = setup.OutputFilename; * String p_m = notes.folder.pathFor(fn.ensureEndsWith("_model.xml"), imbSCI.Data.enums.getWritableFileMode.autoRenameThis); * * String p_d = notes.folder.pathFor(fn.ensureEndsWith("_data.xml"), imbSCI.Data.enums.getWritableFileMode.autoRenameThis); * * * String xmlModel = objectSerialization.ObjectToXML(setup.corpusMethod.weightModel); * * * String xmlData = objectSerialization.ObjectToXML(dataset); * * * File.WriteAllText(p_m, xmlModel); * * File.WriteAllText(p_d, xmlData); * * /* * corpusOperation.weightModel.saveObjectToXML( * notes.folder.pathFor(setup.OutputFilename.ensureEndsWith("_model.xml"), imbSCI.Data.enums.getWritableFileMode.autoRenameThis, "Weight model [" + corpusOperation.weightModel.shortName + "]")); * * dataset.saveObjectToXML(notes.folder.pathFor(setup.OutputFilename.ensureEndsWith("_data.xml"), imbSCI.Data.enums.getWritableFileMode.autoRenameThis, "Weight model [" + corpusOperation.weightModel.shortName + "]")); */ OperationContextReport reportOperation = new OperationContextReport(); reportOperation.DeploySettingsBase(notes); reportOperation.GenerateReports(output.context, setup.reportOptions, notes); } Close(); return(output); }