Ejemplo n.º 1
0
        /// <summary>
        /// Combines two or more precompiled document selection ranks
        /// </summary>
        /// <param name="inputNames">comma separated list of DS rank file names, leave empty if search pattern is used</param>
        /// <param name="searchPattern">file search pattern to select source files, leave * if no file search should be performed</param>
        /// <param name="compression">vector dimensions compression operation, i.e. how scores should be combined into single dimension</param>
        /// <param name="outputName">Name of the output.</param>
        /// <param name="doRankingFusion">if set to <c>true</c> [do ranking fusion].</param>
        /// <remarks>
        /// What it will do?
        /// </remarks>
        /// <seealso cref="aceOperationSetExecutorBase" />
        public void aceOperation_makeCombineDSRanks(
            [Description("Space separated list of DS rank file names, leave empty if search pattern is used")] String inputNames = " ",
            [Description("vector dimensions compression operation, i.e. how scores should be combined into single dimension")] operation compression = operation.avg,
            [Description("Name of output Document Selection Rank file. Leave * to assign name as combination of input files")] String outputName     = "*",
            [Description("If true, it will perform ranking fusion instead of simple score fusion")] Boolean doRankingFusion = true,
            [Description("file search pattern to select source files, leave * if no file search should be performed")] String searchPattern = "*"
            )
        {
            SetupDocumentSelection setup = docSelection.data.CloneViaXML();

            ProceduralFolderFor <ProcedureCreateScoreSet, SetupDocumentSelection, OperationContext, ExperimentModelExecutionContext> procedures
                = new ProceduralFolderFor <ProcedureCreateScoreSet, SetupDocumentSelection, OperationContext, ExperimentModelExecutionContext>(mainContext.folds, setup, mainContext.notes, parent);

            outputName = DocumentSelectResult.CheckAndMakeFilename(outputName);

            foreach (var p in procedures)
            {
                p.Open();


                DocumentSelectResult resultOut = new DocumentSelectResult();

                var fl = mainContext.resourceProvider.GetResourceFiles(inputNames, p.fold);

                List <DocumentSelectResult> results = DocumentRankingExtensions.LoadDSRankings(fl, p.notes);

                resultOut = results.Fusion(compression, doRankingFusion, true, p.notes);

                String pt = mainContext.resourceProvider.SetResourceFilePath(outputName, p.fold);

                resultOut.saveObjectToXML(pt);

                p.Close();
            }
        }
        public override ExperimentDataSetFoldContextPair <OperationContext> Execute(ILogBuilder logger, OperationContext executionContextMain = null, ExperimentModelExecutionContext executionContextExtra = null)
        {
            ExperimentDataSetFoldContextPair <OperationContext> output = new ExperimentDataSetFoldContextPair <OperationContext>(fold, executionContextMain);

            Open();


            if (!setup.documentSelectQuery.PrecompiledScoresFilename.Trim().isNullOrEmpty())
            {
                String precompFile = DocumentSelectResult.CheckAndMakeFilename(setup.documentSelectQuery.PrecompiledScoresFilename);

                var p = executionContextExtra.resourceProvider.GetResourceFile(precompFile, fold);

                //var p = executionContextExtra.resourceProvider.folder.findFile(precompFile, SearchOption.AllDirectories);

                DocumentSelectResult scores = DocumentSelectResult.LoadFromFile(p, logger);  // objectSerialization.loadObjectFromXML<DocumentSelectResult>(path, logger);

                if (scores != null)
                {
                    scores.SaveReport(fold_notes.folder.pathFor("DSScores_loaded.txt", imbSCI.Data.enums.getWritableFileMode.overwrite));

                    scores = setup.documentSelectQuery.ExecuteLimit(scores, logger);

                    IEnumerable <string> assignedIDs = scores.items.Select(x => x.AssignedID);

                    scores.SaveReport(fold_notes.folder.pathFor("DSScores_applied.txt", imbSCI.Data.enums.getWritableFileMode.overwrite));

                    fold.DataSetSubSet(assignedIDs.ToList(), true, true);
                }
                else
                {
                    throw new ArgumentException("DSelection file failed: " + setup.documentSelectQuery.PrecompiledScoresFilename);

                    logger.log(" _ DocumentSelect failed for [" + name + "]");
                }
            }

            classificationReport tmpReport = new classificationReport();

            String dsReportName = fold.name + setup.documentSelectQuery.PrecompiledScoresFilename + setup.documentSelectQuery.SizeLimit;


            DatasetStructureReport dsReport = DatasetStructureReport.MakeStructureReport(fold, dsReportName);

            dsReport.Publish(fold_notes.folder, true, true);

            tmpReport.SetReportDataFields(dsReport);

            if (!output.context.IsDatasetDeployed)
            {
                output.context.DeployDataSet(fold, logger);

                entityOperation.TextRendering(output.context, notes, requirements.MayUseTextRender);


                corpusOperation.SpaceModelPopulation(output.context, notes);

                if (requirements.MayUseSpaceModelCategories)
                {
                    corpusOperation.SpaceModelCategories(output.context, notes);
                }
            }

            tmpReport.SetReportDataFields(output.context, false);

            corpusOperation.FeatureSelection(output.context, notes);


            corpusOperation.VectorSpaceConstruction(output.context, notes, requirements.MayUseVectorSpaceCategories);

            corpusOperation.FeatureVectorConstruction(output.context, notes);


            if (setup.reportOptions.HasFlag(OperationReportEnum.randomSampledDemo))
            {
                logger.log("-- generating random sample report");
                var data_wm = imbNLP.Toolkit.Reporting.ReportGenerators.MakeWeightModelDemoTable(output.context.spaceModel, corpusOperation.weightModel, output.context.SelectedFeatures, 5, "DemoForWeightModel", "Diagnostic report for picked sample");
                data_wm.GetReportAndSave(fold_notes.folder);
                var data_fs = imbNLP.Toolkit.Reporting.ReportGenerators.MakeWeightModelDemoTable(output.context.spaceModel, corpusOperation.filter.WeightModel, output.context.SelectedFeatures, 5, "DemoForFeatureSelection", "Diagnostic report for feature selection filter sample");
                data_fs.GetReportAndSave(fold_notes.folder);
            }

            classificationOperation.PerformClassification(output.context, executionContextExtra.truthTable, setup.dataSetMode, notes);


            corpusOperation.weightModel.DiagnosticDump(fold_notes.folder, logger);

            //classificationOperation.classifier.

            classificationEvalMetricSet evaluationMetrics = executionContextExtra.truthTable.EvaluateTestResultsToMetricSet(output.context.testResults, setup.OutputFilename + "-" + notes.folder.name, logger);

            if (setup.ExportEvaluationAsDocumentSelectionResult)
            {
                Toolkit.Feature.FeatureVectorDictionaryWithDimensions dict = executionContextExtra.truthTable.GetEvaluationAsFeatureVectorDictionary(output.context.testResults, setup.OutputFilename, logger, setup.ExportEvaluationCorrectScore, setup.ExportEvaluationIncorrectScore);
                String out_ds = setup.ExportEvaluationToFilename.Replace("*", "");
                dict.Save(fold_notes.folder, out_ds.or(setup.OutputFilename), logger);
                //executionContextExtra.resourceProvider.folder
                dict.Save(notes.folder, out_ds.or(setup.OutputFilename), logger);
            }


            DataTableTypeExtended <classificationEval> inclassEvalTable = new DataTableTypeExtended <classificationEval>("inclass_evaluation", "Test results, per class");

            evaluationMetrics.GetAllEntries().ForEach(x => inclassEvalTable.AddRow(x));
            inclassEvalTable.AddRow(evaluationMetrics.GetSummary("Sum"));
            notes.SaveDataTable(inclassEvalTable, notes.folder_classification);

            classificationReport averagedReport = new classificationReport(evaluationMetrics, setup.averagingMethod);

            averagedReport.Classifier = classificationOperation.classifier.GetSignature(); // featureMethod.classifierSettings.name; // FeatureMethod.classifier.name;
            averagedReport.saveObjectToXML(notes.folder_classification.pathFor(averagedReport.Name + ".xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Serialized classification evaluation results summary"));
            averagedReport.ReportToLog(notes);

            averagedReport.SetReportDataFields(output.context, true);
            averagedReport.data.Merge(tmpReport.data);

            averagedReport.SetReportDataFields(classificationOperation.classifier, corpusOperation.filter, corpusOperation.weightModel);



            executionContextExtra.testSummaries.Add(averagedReport);



            OperationContextReport reportOperation = new OperationContextReport();

            reportOperation.DeploySettingsBase(notes);

            reportOperation.GenerateReports(output.context, setup.reportOptions, notes);

            /*
             * if (setup.reportOptions.HasFlag(OperationReportEnum.reportClassification))
             * {
             *
             *  Dictionary<string, List<FeatureVectorWithLabelID>> byCategory = executionContextExtra.truthTable.GroupByTrueCategory(executionContextMain.testResults);
             *
             *  objectTable<classificationReport> tbl = new objectTable<classificationReport>(nameof(classificationReport.Name), "inclass_" + executionContextExtra.runName);
             *  classificationReport macroAverage = new classificationReport("AVG-" + executionContextExtra.runName);
             *  foreach (KeyValuePair<string, List<FeatureVectorWithLabelID>> pair in byCategory)
             *  {
             *      var cReport = executionContextExtra.EvaluateTestResults(pair.Value, pair.Key + "-" + executionContextExtra.runName, logger);
             *
             *      cReport.Classifier = classificationOperation.classifier.GetSignature(); // classifier.name;
             *      cReport.Comment = "Tr/Ts [" + executionContextMain.trainingSet.Count + "]:[" + executionContextMain.testSet.Count + "]";
             *      String path = notes.folder_classification.pathFor(pair.Key + "_result.xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Serialized evaluation result within category [" + pair.Key + "]", true);
             *
             *      macroAverage.AddValues(cReport);
             *
             *      tbl.Add(cReport);
             *  }
             *  //  macroAverage.DivideValues(byCategory.Keys.Count);
             *
             *  tbl.Add(macroAverage);
             *
             *  notes.SaveDataTable(tbl.GetDataTable(), notes.folder_classification);
             *
             * }*/

            Close();

            return(output);
        }
Ejemplo n.º 3
0
        public override ExperimentDataSetFoldContextPair <OperationContext> Execute(ILogBuilder logger, OperationContext executionContextMain = null, ExperimentModelExecutionContext executionContextExtra = null)
        {
            ExperimentDataSetFoldContextPair <OperationContext> output = new ExperimentDataSetFoldContextPair <OperationContext>(fold, executionContextMain);

            Open();

            Boolean skip = false;

            String fn = setup.OutputFilename;

            // String p_m = fold_notes.folder.pathFor(fn.ensureEndsWith("_ranking.xml"), imbSCI.Data.enums.getWritableFileMode.none);



            if (setup.skipIfExisting)
            {
                String f_n = DocumentSelectResult.CheckAndMakeFilename(fn);

                f_n  = executionContextExtra.resourceProvider.GetResourceFile(f_n, fold); // .folder.findFile(f_n, SearchOption.AllDirectories);
                skip = DocumentRankingExtensions.EvaluateSavedDSRanking(f_n, logger, 0.01);
            }

            if (!skip)
            {
                output.context.DeployDataSet(fold, logger);


                //if (!output.context.IsTextRendered)
                //{
                entityOperation.TextRendering(output.context, notes);

                //entityOperation.TextPreblendFilter(output.context, notes);

                //entityOperation.TextBlending(output.context, notes);
                // }

                //if (!output.context.spaceModel.IsModelReady)
                //{

                corpusOperation.SpaceModelPopulation(output.context, notes);

                corpusOperation.SpaceModelCategories(output.context, notes);


                corpusOperation.FeatureSelection(output.context, notes, requirements.MayUseSelectedFeatures);

                corpusOperation.VectorSpaceConstruction(output.context, notes, requirements.MayUseSpaceModelCategories);



                //    }

                logger.log("Document selection computation");


                DocumentSelectResult drmContext = output.context.PrepareContext(ranking, fold_notes.folder, logger);
                drmContext = ranking.ExecuteEvaluation(drmContext, logger);



                foreach (String l in setup.descriptionAppendix)
                {
                    drmContext.description += Environment.NewLine + l;
                }



                fn = DocumentSelectResult.CheckAndMakeFilename(fn);
                fn = executionContextExtra.resourceProvider.SetResourceFilePath(fn, fold);
                // f_n = executionContextExtra.resourceProvider.folder.pathFor(f_n, imbSCI.Data.enums.getWritableFileMode.overwrite, "");
                String xmlModel = objectSerialization.ObjectToXML(drmContext);
                File.WriteAllText(fn, xmlModel);


                //corpusOperation.weightModel.PrepareTheModel(output.context.spaceModel);


                /*
                 * var dataset = corpusOperation.weightModel.SaveModelDataSet();
                 *
                 *
                 * String fn = setup.OutputFilename;
                 * String p_m = notes.folder.pathFor(fn.ensureEndsWith("_model.xml"), imbSCI.Data.enums.getWritableFileMode.autoRenameThis);
                 *
                 * String p_d = notes.folder.pathFor(fn.ensureEndsWith("_data.xml"), imbSCI.Data.enums.getWritableFileMode.autoRenameThis);
                 *
                 *
                 * String xmlModel = objectSerialization.ObjectToXML(setup.corpusMethod.weightModel);
                 *
                 *
                 * String xmlData = objectSerialization.ObjectToXML(dataset);
                 *
                 *
                 * File.WriteAllText(p_m, xmlModel);
                 *
                 * File.WriteAllText(p_d, xmlData);
                 *
                 * /*
                 * corpusOperation.weightModel.saveObjectToXML(
                 *  notes.folder.pathFor(setup.OutputFilename.ensureEndsWith("_model.xml"), imbSCI.Data.enums.getWritableFileMode.autoRenameThis, "Weight model [" + corpusOperation.weightModel.shortName + "]"));
                 *
                 * dataset.saveObjectToXML(notes.folder.pathFor(setup.OutputFilename.ensureEndsWith("_data.xml"), imbSCI.Data.enums.getWritableFileMode.autoRenameThis, "Weight model [" + corpusOperation.weightModel.shortName + "]"));
                 */



                OperationContextReport reportOperation = new OperationContextReport();
                reportOperation.DeploySettingsBase(notes);

                reportOperation.GenerateReports(output.context, setup.reportOptions, notes);
            }

            Close();

            return(output);
        }