Example #1
0
        public override ExperimentDataSetFoldContextPair <OperationContext> Execute(ILogBuilder logger, OperationContext executionContextMain = null, ExperimentModelExecutionContext executionContextExtra = null)
        {
            ExperimentDataSetFoldContextPair <OperationContext> output = new ExperimentDataSetFoldContextPair <OperationContext>(fold, executionContextMain);

            Open();


            output.context.DeployDataSet(fold, logger);

            entityOperation.TextRendering(output.context, notes, requirements.MayUseTextRender);

            corpusOperation.SpaceModelPopulation(output.context, notes);

            corpusOperation.SpaceModelCategories(output.context, notes);


            FeatureFilterAndWeightModelAnalysis fwmAnalysis = new FeatureFilterAndWeightModelAnalysis(output.context.spaceModel, setup.WeightModels, setup.FilterModels);



            corpusOperation.FeatureSelection(output.context, notes);


            corpusOperation.VectorSpaceConstruction(output.context, notes, true);

            corpusOperation.FeatureVectorConstruction(output.context, notes);



            fwmAnalysis.ExecuteAnalysis(output.context, logger, fold_notes.folder_feature);


            //if (setup.tasks.HasFlag(CWPAnalysisReportsEnum.reportTermDistribution))
            //{

            //    var model = output.context.spaceModel.categories.GetHeatMapMatrix();
            //    HeatMapRender heatMapRender = new HeatMapRender();
            //    heatMapRender.RenderAndSave(model, fold_notes.folder_feature.pathFor("category_overlap_beforeFS", imbSCI.Data.enums.getWritableFileMode.overwrite, "Heat map showing overlaping terms and their frequencies, before feature selection"));

            //}



            if (setup.tasks.HasFlag(CWPAnalysisReportsEnum.reportDatasetStructure))
            {
                DatasetStructureReport datasetStructureReport = DatasetStructureReport.MakeStructureReport(fold, fold.name);

                datasetStructureReport.Compute();
                datasetStructureReport.Publish(fold_notes.folder, true, true, true);
            }

            if (setup.tasks.HasFlag(CWPAnalysisReportsEnum.reportDatasetMetrics))
            {
                ContentAnalytics contentAnalytics = new ContentAnalytics(fold_notes.folder_entity);

                var Metrics = contentAnalytics.ProduceMetrics(fold.name, fold, output.context, logger);
                Metrics.ReportHTMLTags(fold_notes.folder_entity, fold.name);
                Metrics.ReportSample(fold_notes.folder_entity, fold.name, 1000);
                Metrics.ReportTokens(fold_notes.folder_corpus, fold.name, 1000);
                Metrics.GetDataTable(fold_notes.name).GetReportAndSave(fold_notes.folder_entity, null, "Dataset");
            }


            if (setup.tasks.HasFlag(CWPAnalysisReportsEnum.reportTermDistribution))
            {
                imbSCI.Core.math.range.matrix.HeatMapModel model = output.context.spaceModel.categories.GetHeatMapMatrix();


                model.GetDataTable("CategoryFreqOverlap", "Overlaping terms and their frequencies").GetReportAndSave(fold_notes.folder, null, "CategoryOverlap");

                try
                {
                    HeatMapRender heatMapRender = new HeatMapRender();
                    heatMapRender.style.accronimLength = 3;
                    heatMapRender.style.BaseColor      = Color.Black;
                    heatMapRender.style.fieldHeight    = 50;
                    heatMapRender.style.fieldWidth     = 50;
                    var svg = heatMapRender.Render(model);
                    svg.Save(fold_notes.folder_feature.pathFor("category_overlap_afterFS.svg", imbSCI.Data.enums.getWritableFileMode.overwrite, "Heat map showing overlaping terms and their frequencies"));
                    svg.SaveJPEG(fold_notes.folder_feature.pathFor("category_overlap_afterFS.jpg", imbSCI.Data.enums.getWritableFileMode.overwrite, "Heat map showing overlaping terms and their frequencies"));
                }
                catch (Exception ex)
                {
                    logger.log(ex.Message);
                }

                List <histogramModel> models = new List <histogramModel>();

                foreach (var cat in output.context.spaceModel.categories)
                {
                    var hist = cat.GetHistogram(20);
                    models.Add(hist);
                    DataTable dt_hist = hist.GetDataTableForFrequencies();

                    dt_hist.GetReportAndSave(fold_notes.folder, null, "histogram_table_" + cat.name);

                    string h_p = fold_notes.folder_feature.pathFor(cat.name + "_term_distribution.svg", imbSCI.Data.enums.getWritableFileMode.overwrite, "Histogram with term distribution for category [" + cat.name + "]");


                    // File.WriteAllText(h_p, hist.GetSVGChart());
                }

                models.BlendHistogramModels(fold.name).GetReportAndSave(fold_notes.folder, null, "histogram_all");
            }

            if (setup.tasks.HasFlag(CWPAnalysisReportsEnum.reportCWPAnalytics))
            {
                analysis.Prepare(output.context.spaceModel, logger);

                analysis.Analysis(fold_notes);
            }


            Close();

            return(output);
        }
        public override ExperimentDataSetFoldContextPair <OperationContext> Execute(ILogBuilder logger, OperationContext executionContextMain = null, ExperimentModelExecutionContext executionContextExtra = null)
        {
            ExperimentDataSetFoldContextPair <OperationContext> output = new ExperimentDataSetFoldContextPair <OperationContext>(fold, executionContextMain);

            Open();

            String p_m = FeatureWeightModel.GetModelDefinitionFilename(setup.OutputFilename, fold_notes.folder);

            String p_d = FeatureWeightModel.GetModelDataFilename(setup.OutputFilename, fold_notes.folder);

            String w_t = WeightDictionary.GetDictionaryFilename(setup.OutputFilename, fold_notes.folder);

            Boolean skip = false;

            if (setup.skipIfExisting)
            {
                if (File.Exists(p_m) && File.Exists(p_d) && File.Exists(w_t))
                {
                    logger.log("WeightTable [" + p_d + "] found, skipping the operation");
                    skip = true;
                }
            }

            if (!skip)
            {
                output.context.DeployDataSet(fold, logger);

                entityOperation.TextRendering(output.context, notes);

                /*
                 * entityOperation.TextPreblendFilter(output.context, notes);
                 *
                 * entityOperation.TextBlending(output.context, notes);
                 */

                corpusOperation.SpaceModelPopulation(output.context, notes);

                corpusOperation.SpaceModelCategories(output.context, notes);

                corpusOperation.FeatureSelection(output.context, notes, requirements.MayUseSelectedFeatures);

                output.context.SelectedFeatures.Save(fold_notes.folder, notes, setup.OutputFilename + "_fs");

                //corpusOperation.weightModel.

                corpusOperation.weightModel.PrepareTheModel(output.context.spaceModel, logger);

                var wt_s = corpusOperation.weightModel.GetElementFactors(output.context.SelectedFeatures.GetKeys(), output.context.spaceModel);

                wt_s.Save(fold_notes.folder, notes, setup.OutputFilename);

                corpusOperation.weightModel.Save(setup.OutputFilename, fold_notes.folder, notes);

                OperationContextReport reportOperation = new OperationContextReport();
                reportOperation.DeploySettingsBase(notes);

                reportOperation.GenerateReports(output.context, setup.reportOptions, notes);
            }

            Close();

            return(output);
        }
Example #3
0
        public override ExperimentDataSetFoldContextPair <OperationContext> Execute(ILogBuilder logger, OperationContext executionContextMain = null, ExperimentModelExecutionContext executionContextExtra = null)
        {
            ExperimentDataSetFoldContextPair <OperationContext> output = new ExperimentDataSetFoldContextPair <OperationContext>(fold, executionContextMain);

            Open();

            Boolean skip = false;

            //  String fn = setup.OutputFilename;

            String p_m = WeightDictionary.GetDictionaryFilename(setup.OutputFilename, fold_notes.folder);  //FeatureWeightModel.GetModelDefinitionFilename(setup.OutputFilename, fold_notes.folder);

            //String p_d = FeatureWeightModel.GetModelDataFilename(setup.OutputFilename, fold_notes.folder);


            if (setup.skipIfExisting)
            {
                if (File.Exists(p_m))
                {
                    logger.log("WeightTable [" + p_m + "] found, skipping the operation");
                    skip = true;
                }
            }



            if (!skip)
            {
                notes.log("Rendering primary view");

                // ------------------- PRIMARY CONTEXT

                output.context.DeployDataSet(fold, logger);

                primaryEntityOperation.TextRendering(output.context, notes);

                //primaryEntityOperation.TextPreblendFilter(output.context, notes);

                //primaryEntityOperation.TextBlending(output.context, notes);


                corpusOperation.SpaceModelPopulation(output.context, notes);

                corpusOperation.SpaceModelCategories(output.context, notes);

                corpusOperation.FeatureSelection(output.context, notes, requirements.MayUseSelectedFeatures);



                OperationContext primaryContext = output.context;

                // ------------------- SECONDARY CONTEXT

                output.context = new OperationContext();

                notes.log("Rendering secondary view");

                output.context.DeployDataSet(fold, logger);

                secondaryEntityOperation.TextRendering(output.context, notes);

                //  secondaryEntityOperation.TextPreblendFilter(output.context, notes);

                // secondaryEntityOperation.TextBlending(output.context, notes);

                corpusOperation.SpaceModelPopulation(output.context, notes);

                corpusOperation.SpaceModelCategories(output.context, notes);

                corpusOperation.FeatureSelection(output.context, notes, requirements.MayUseSelectedFeatures);


                OperationContext secondaryContext = output.context;



                ProjectionDictionary projectionPairs = DocumentRankingTools.ConstructPairDictionary(primaryContext.spaceModel.documents, secondaryContext.spaceModel.documents);

                DocumentSelectResult drmContext = output.context.PrepareContext(rankingOperation, fold_notes.folder, logger);
                drmContext             = rankingOperation.ExecuteEvaluation(drmContext, logger);
                drmContext.description = "Document score assigned to the primary text render" + name;
                drmContext.saveObjectToXML(fold_notes.folder.pathFor("DS_" + name + "_projection_score.xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Projection within [" + name + "] operation"));

                TokenFrequencyAndScoreDictionary tokenFrequencyAndScoreDictionary = ProjectionTools.ProjectPrimaryTermsToScores(projectionPairs, drmContext, logger);

                WeightDictionary wt = tokenFrequencyAndScoreDictionary.ConstructWeightDictionary();
                wt.name        = setup.OutputFilename;
                wt.description = "Projected PrimaryView to ScoreTable - WeightTable, constructed from [" + projectionPairs.Count + "] render pairs. Document ranking: " + drmContext.description;

                wt.Save(fold_notes.folder, logger, setup.OutputFilename);

                //                wt.saveObjectToXML(p_m);
            }


            Close();

            return(output);
        }
        public override ExperimentDataSetFoldContextPair <OperationContext> Execute(ILogBuilder logger, OperationContext executionContextMain = null, ExperimentModelExecutionContext executionContextExtra = null)
        {
            ExperimentDataSetFoldContextPair <OperationContext> output = new ExperimentDataSetFoldContextPair <OperationContext>(fold, executionContextMain);

            Open();


            if (!setup.documentSelectQuery.PrecompiledScoresFilename.Trim().isNullOrEmpty())
            {
                String precompFile = DocumentSelectResult.CheckAndMakeFilename(setup.documentSelectQuery.PrecompiledScoresFilename);

                var p = executionContextExtra.resourceProvider.GetResourceFile(precompFile, fold);

                //var p = executionContextExtra.resourceProvider.folder.findFile(precompFile, SearchOption.AllDirectories);

                DocumentSelectResult scores = DocumentSelectResult.LoadFromFile(p, logger);  // objectSerialization.loadObjectFromXML<DocumentSelectResult>(path, logger);

                if (scores != null)
                {
                    scores.SaveReport(fold_notes.folder.pathFor("DSScores_loaded.txt", imbSCI.Data.enums.getWritableFileMode.overwrite));

                    scores = setup.documentSelectQuery.ExecuteLimit(scores, logger);

                    IEnumerable <string> assignedIDs = scores.items.Select(x => x.AssignedID);

                    scores.SaveReport(fold_notes.folder.pathFor("DSScores_applied.txt", imbSCI.Data.enums.getWritableFileMode.overwrite));

                    fold.DataSetSubSet(assignedIDs.ToList(), true, true);
                }
                else
                {
                    throw new ArgumentException("DSelection file failed: " + setup.documentSelectQuery.PrecompiledScoresFilename);

                    logger.log(" _ DocumentSelect failed for [" + name + "]");
                }
            }

            classificationReport tmpReport = new classificationReport();

            String dsReportName = fold.name + setup.documentSelectQuery.PrecompiledScoresFilename + setup.documentSelectQuery.SizeLimit;


            DatasetStructureReport dsReport = DatasetStructureReport.MakeStructureReport(fold, dsReportName);

            dsReport.Publish(fold_notes.folder, true, true);

            tmpReport.SetReportDataFields(dsReport);

            if (!output.context.IsDatasetDeployed)
            {
                output.context.DeployDataSet(fold, logger);

                entityOperation.TextRendering(output.context, notes, requirements.MayUseTextRender);


                corpusOperation.SpaceModelPopulation(output.context, notes);

                if (requirements.MayUseSpaceModelCategories)
                {
                    corpusOperation.SpaceModelCategories(output.context, notes);
                }
            }

            tmpReport.SetReportDataFields(output.context, false);

            corpusOperation.FeatureSelection(output.context, notes);


            corpusOperation.VectorSpaceConstruction(output.context, notes, requirements.MayUseVectorSpaceCategories);

            corpusOperation.FeatureVectorConstruction(output.context, notes);


            if (setup.reportOptions.HasFlag(OperationReportEnum.randomSampledDemo))
            {
                logger.log("-- generating random sample report");
                var data_wm = imbNLP.Toolkit.Reporting.ReportGenerators.MakeWeightModelDemoTable(output.context.spaceModel, corpusOperation.weightModel, output.context.SelectedFeatures, 5, "DemoForWeightModel", "Diagnostic report for picked sample");
                data_wm.GetReportAndSave(fold_notes.folder);
                var data_fs = imbNLP.Toolkit.Reporting.ReportGenerators.MakeWeightModelDemoTable(output.context.spaceModel, corpusOperation.filter.WeightModel, output.context.SelectedFeatures, 5, "DemoForFeatureSelection", "Diagnostic report for feature selection filter sample");
                data_fs.GetReportAndSave(fold_notes.folder);
            }

            classificationOperation.PerformClassification(output.context, executionContextExtra.truthTable, setup.dataSetMode, notes);


            corpusOperation.weightModel.DiagnosticDump(fold_notes.folder, logger);

            //classificationOperation.classifier.

            classificationEvalMetricSet evaluationMetrics = executionContextExtra.truthTable.EvaluateTestResultsToMetricSet(output.context.testResults, setup.OutputFilename + "-" + notes.folder.name, logger);

            if (setup.ExportEvaluationAsDocumentSelectionResult)
            {
                Toolkit.Feature.FeatureVectorDictionaryWithDimensions dict = executionContextExtra.truthTable.GetEvaluationAsFeatureVectorDictionary(output.context.testResults, setup.OutputFilename, logger, setup.ExportEvaluationCorrectScore, setup.ExportEvaluationIncorrectScore);
                String out_ds = setup.ExportEvaluationToFilename.Replace("*", "");
                dict.Save(fold_notes.folder, out_ds.or(setup.OutputFilename), logger);
                //executionContextExtra.resourceProvider.folder
                dict.Save(notes.folder, out_ds.or(setup.OutputFilename), logger);
            }


            DataTableTypeExtended <classificationEval> inclassEvalTable = new DataTableTypeExtended <classificationEval>("inclass_evaluation", "Test results, per class");

            evaluationMetrics.GetAllEntries().ForEach(x => inclassEvalTable.AddRow(x));
            inclassEvalTable.AddRow(evaluationMetrics.GetSummary("Sum"));
            notes.SaveDataTable(inclassEvalTable, notes.folder_classification);

            classificationReport averagedReport = new classificationReport(evaluationMetrics, setup.averagingMethod);

            averagedReport.Classifier = classificationOperation.classifier.GetSignature(); // featureMethod.classifierSettings.name; // FeatureMethod.classifier.name;
            averagedReport.saveObjectToXML(notes.folder_classification.pathFor(averagedReport.Name + ".xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Serialized classification evaluation results summary"));
            averagedReport.ReportToLog(notes);

            averagedReport.SetReportDataFields(output.context, true);
            averagedReport.data.Merge(tmpReport.data);

            averagedReport.SetReportDataFields(classificationOperation.classifier, corpusOperation.filter, corpusOperation.weightModel);



            executionContextExtra.testSummaries.Add(averagedReport);



            OperationContextReport reportOperation = new OperationContextReport();

            reportOperation.DeploySettingsBase(notes);

            reportOperation.GenerateReports(output.context, setup.reportOptions, notes);

            /*
             * if (setup.reportOptions.HasFlag(OperationReportEnum.reportClassification))
             * {
             *
             *  Dictionary<string, List<FeatureVectorWithLabelID>> byCategory = executionContextExtra.truthTable.GroupByTrueCategory(executionContextMain.testResults);
             *
             *  objectTable<classificationReport> tbl = new objectTable<classificationReport>(nameof(classificationReport.Name), "inclass_" + executionContextExtra.runName);
             *  classificationReport macroAverage = new classificationReport("AVG-" + executionContextExtra.runName);
             *  foreach (KeyValuePair<string, List<FeatureVectorWithLabelID>> pair in byCategory)
             *  {
             *      var cReport = executionContextExtra.EvaluateTestResults(pair.Value, pair.Key + "-" + executionContextExtra.runName, logger);
             *
             *      cReport.Classifier = classificationOperation.classifier.GetSignature(); // classifier.name;
             *      cReport.Comment = "Tr/Ts [" + executionContextMain.trainingSet.Count + "]:[" + executionContextMain.testSet.Count + "]";
             *      String path = notes.folder_classification.pathFor(pair.Key + "_result.xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Serialized evaluation result within category [" + pair.Key + "]", true);
             *
             *      macroAverage.AddValues(cReport);
             *
             *      tbl.Add(cReport);
             *  }
             *  //  macroAverage.DivideValues(byCategory.Keys.Count);
             *
             *  tbl.Add(macroAverage);
             *
             *  notes.SaveDataTable(tbl.GetDataTable(), notes.folder_classification);
             *
             * }*/

            Close();

            return(output);
        }
Example #5
0
        public override ExperimentDataSetFoldContextPair <OperationContext> Execute(ILogBuilder logger, OperationContext executionContextMain = null, ExperimentModelExecutionContext executionContextExtra = null)
        {
            ExperimentDataSetFoldContextPair <OperationContext> output = new ExperimentDataSetFoldContextPair <OperationContext>(fold, executionContextMain);

            Open();

            Boolean skip = false;

            String fn = setup.OutputFilename;

            // String p_m = fold_notes.folder.pathFor(fn.ensureEndsWith("_ranking.xml"), imbSCI.Data.enums.getWritableFileMode.none);



            if (setup.skipIfExisting)
            {
                String f_n = DocumentSelectResult.CheckAndMakeFilename(fn);

                f_n  = executionContextExtra.resourceProvider.GetResourceFile(f_n, fold); // .folder.findFile(f_n, SearchOption.AllDirectories);
                skip = DocumentRankingExtensions.EvaluateSavedDSRanking(f_n, logger, 0.01);
            }

            if (!skip)
            {
                output.context.DeployDataSet(fold, logger);


                //if (!output.context.IsTextRendered)
                //{
                entityOperation.TextRendering(output.context, notes);

                //entityOperation.TextPreblendFilter(output.context, notes);

                //entityOperation.TextBlending(output.context, notes);
                // }

                //if (!output.context.spaceModel.IsModelReady)
                //{

                corpusOperation.SpaceModelPopulation(output.context, notes);

                corpusOperation.SpaceModelCategories(output.context, notes);


                corpusOperation.FeatureSelection(output.context, notes, requirements.MayUseSelectedFeatures);

                corpusOperation.VectorSpaceConstruction(output.context, notes, requirements.MayUseSpaceModelCategories);



                //    }

                logger.log("Document selection computation");


                DocumentSelectResult drmContext = output.context.PrepareContext(ranking, fold_notes.folder, logger);
                drmContext = ranking.ExecuteEvaluation(drmContext, logger);



                foreach (String l in setup.descriptionAppendix)
                {
                    drmContext.description += Environment.NewLine + l;
                }



                fn = DocumentSelectResult.CheckAndMakeFilename(fn);
                fn = executionContextExtra.resourceProvider.SetResourceFilePath(fn, fold);
                // f_n = executionContextExtra.resourceProvider.folder.pathFor(f_n, imbSCI.Data.enums.getWritableFileMode.overwrite, "");
                String xmlModel = objectSerialization.ObjectToXML(drmContext);
                File.WriteAllText(fn, xmlModel);


                //corpusOperation.weightModel.PrepareTheModel(output.context.spaceModel);


                /*
                 * var dataset = corpusOperation.weightModel.SaveModelDataSet();
                 *
                 *
                 * String fn = setup.OutputFilename;
                 * String p_m = notes.folder.pathFor(fn.ensureEndsWith("_model.xml"), imbSCI.Data.enums.getWritableFileMode.autoRenameThis);
                 *
                 * String p_d = notes.folder.pathFor(fn.ensureEndsWith("_data.xml"), imbSCI.Data.enums.getWritableFileMode.autoRenameThis);
                 *
                 *
                 * String xmlModel = objectSerialization.ObjectToXML(setup.corpusMethod.weightModel);
                 *
                 *
                 * String xmlData = objectSerialization.ObjectToXML(dataset);
                 *
                 *
                 * File.WriteAllText(p_m, xmlModel);
                 *
                 * File.WriteAllText(p_d, xmlData);
                 *
                 * /*
                 * corpusOperation.weightModel.saveObjectToXML(
                 *  notes.folder.pathFor(setup.OutputFilename.ensureEndsWith("_model.xml"), imbSCI.Data.enums.getWritableFileMode.autoRenameThis, "Weight model [" + corpusOperation.weightModel.shortName + "]"));
                 *
                 * dataset.saveObjectToXML(notes.folder.pathFor(setup.OutputFilename.ensureEndsWith("_data.xml"), imbSCI.Data.enums.getWritableFileMode.autoRenameThis, "Weight model [" + corpusOperation.weightModel.shortName + "]"));
                 */



                OperationContextReport reportOperation = new OperationContextReport();
                reportOperation.DeploySettingsBase(notes);

                reportOperation.GenerateReports(output.context, setup.reportOptions, notes);
            }

            Close();

            return(output);
        }