Exemple #1
0
        /// <summary>
        /// Loads the semantic clouds of the specified FVE
        /// </summary>
        /// <param name="fve">The fve.</param>
        /// <param name="logger">The logger.</param>
        public void LoadSemanticClouds(semanticFVExtractor fve, ILogBuilder logger)
        {
            DirectoryInfo di = folder;

            var dirs = di.GetDirectories(fve.name);

            if (!dirs.Any())
            {
                logger.log("Failed to find subfolder for FVE [" + fve.name + "]");
            }
            else
            {
                DirectoryInfo dir = dirs.First();

                folderNode fveFolder = dir;

                var   allCloudFiles = fveFolder.findFiles("*Cloud.xml", SearchOption.AllDirectories);
                Int32 cl            = 0;
                foreach (String cloudFile in allCloudFiles)
                {
                    if (cloudFile.Contains("General") || cloudFile.Contains("SharedKnowledge"))
                    {
                    }
                    else
                    {
                        semanticClouds.Add(fve.name, objectSerialization.loadObjectFromXML <lemmaSemanticCloud>(cloudFile, logger));
                        cl++;
                    }
                }

                logger.log("Semantic clouds loaded [" + cl + "] for " + fve.name);
            }
        }
        public semanticFVExtractor CreateExperiment(experimentSetup model, ILogBuilder output)
        {
            if (model == null)
            {
                model = blueprint;
            }

            semanticFVExtractor fve = model.models.First() as semanticFVExtractor;

            String serializedModel = objectSerialization.ObjectToXML(model);

            foreach (experimentTemplateVariable variable in replacements)
            {
                String newModelXML = serializedModel;
                serializedModel = serializedModel.Replace(variable.needle, variable.replace);

                semanticFVExtractor newModel = objectSerialization.ObjectFromXML <semanticFVExtractor>(newModelXML);
                newModel.name        = newModel.GetShortName(name);
                newModel.description = newModel.GetShortDescription();
                output.log("-- created model: " + newModel.name);
                //experiment.featureVectorExtractors_semantic.Add(newModel);
            }



            //for (INt32 i = start; i < end; i++)
            //{
            //    String newModelXML = serializedModel;
            //    newModelXML = newModelXML.Replace(needle, "<caseTermExpansionSteps>" + i + "</caseTermExpansionSteps>");


            //}

            return(fve);
        }
Exemple #3
0
        /// <summary>
        /// Sets the tw.
        /// </summary>
        /// <param name="fve">The fve.</param>
        /// <param name="flag">The flag.</param>
        public static void SetTW(this semanticFVExtractor fve, String flag = "std", Double DFC = 1.1, Boolean IDFOn = true)
        {
            flag = flag.ToLower();

            fve.termTableConstructor.settings.documentFrequencyMaxFactor = DFC;
            fve.termTableConstructor.settings.doUseIDF = IDFOn;

            switch (flag)
            {
            case "std":
                fve.termTableConstructor.settings.titleTextFactor   = 1;
                fve.termTableConstructor.settings.anchorTextFactor  = 0.75;
                fve.termTableConstructor.settings.contentTextFactor = 0.5;
                break;

            case "bst":
                fve.termTableConstructor.settings.titleTextFactor   = 10;
                fve.termTableConstructor.settings.anchorTextFactor  = 1;
                fve.termTableConstructor.settings.contentTextFactor = 0.1;
                break;

            case "off":
                fve.termTableConstructor.settings.titleTextFactor   = 1;
                fve.termTableConstructor.settings.anchorTextFactor  = 1;
                fve.termTableConstructor.settings.contentTextFactor = 1;
                break;
            }
        }
        public static experimentSetup GetDefaultExperimentSetup()
        {
            experimentSetup setup = new experimentSetup();


            setup.classifiers_settings.Add(new WebPostClassifierSettings(WebPostClassifierType.kNearestNeighbors, "kNN"));
            setup.classifiers_settings.Add(new WebPostClassifierSettings(WebPostClassifierType.multiClassSVM, "mSVM"));
            setup.classifiers_settings.Add(new WebPostClassifierSettings(WebPostClassifierType.naiveBayes, "nBayes"));
            setup.classifiers_settings.Add(new WebPostClassifierSettings(WebPostClassifierType.backPropagationActivationNeuralNetwork, "bpANN"));

            //= new List<pos_type> { pos_type.A, pos_type.N };
            var tfe = new semanticFVExtractor();

            tfe.termTableConstructor.settings.allowedLemmaTypes.AddUnique(pos_type.A);
            tfe.termTableConstructor.settings.allowedLemmaTypes.AddUnique(pos_type.N);


            var sfe = new semanticFVExtractor();

            sfe.termTableConstructor.settings.allowedLemmaTypes.AddUnique(pos_type.A);
            sfe.termTableConstructor.settings.allowedLemmaTypes.AddUnique(pos_type.N);

            setup.featureVectorExtractors_semantic.Add(tfe);
            setup.featureVectorExtractors_semantic.Add(sfe);

            setup.validationSetup.name = setup.name;
            setup.validationSetup.k    = 5;

            setup.setClassifiers();

            return(setup);
        }
        public experimentTemplate MakeTemplate([Description("Name of the experiment to process")] experimentSetup experiment,
                                               [Description("Starting value for Stx")] Int32 start = 3,
                                               [Description("Ending value for Sts")] Int32 end     = 8,
                                               [Description("3 or 4 letter code indicating how the settings are different then in other experiments")] String name = "",
                                               [Description("Comment line for experiment header")] String comment = "",
                                               experimentTemplateOption option = experimentTemplateOption.STX)
        {
            experimentTemplate output = new experimentTemplate();

            output.DeriveBlueprint(experiment, name);
            output.comment = comment;

            semanticFVExtractor model = experiment.models.First() as semanticFVExtractor;

            experiment.RemoveAllModelsExcept();

            String currentValue   = "";
            String xmlElementName = "";

            switch (option)
            {
            case experimentTemplateOption.LPF:
                currentValue   = model.settings.semanticCloudFilter.lowPassFilter.ToString();
                xmlElementName = "lowPassFilter";
                break;

            case experimentTemplateOption.none:
                break;

            case experimentTemplateOption.REPEAT:
                break;

            case experimentTemplateOption.STX:
                currentValue   = model.settings.caseTermExpansionSteps.ToString();
                xmlElementName = "caseTermExpansionSteps";
                break;

            case experimentTemplateOption.TC:
                break;
            }

            for (int i = start; i < end; i++)
            {
                var exp = new experimentTemplateVariable();
                exp.needle  = "<" + xmlElementName + ">" + currentValue + "</" + xmlElementName + ">";
                exp.replace = "<" + xmlElementName + ">" + i + "</" + xmlElementName + ">";
                exp.i       = i;
                output.replacements.Add(exp);
            }



            return(output);
        }
Exemple #6
0
        /// <summary>
        /// Sets the execution context.
        /// </summary>
        /// <param name="_manager">The manager.</param>
        /// <param name="_setup">The setup.</param>
        /// <param name="_tools">The tools.</param>
        /// <param name="_classes">The classes.</param>
        /// <param name="sufix">The sufix.</param>
        /// <param name="chunker">The chunker.</param>
        /// <param name="_masterExtractor">The master extractor.</param>
        /// <param name="_logger">The logger.</param>
        public void SetExecutionContext(experimentManager _manager, experimentSetup _setup, classifierTools _tools, DocumentSetClasses _classes, String sufix, chunkComposerBasic chunker, semanticFVExtractor _masterExtractor, ILogBuilder _logger = null)
        {
            if (_logger == null)
            {
                _logger = new builderForLog();
                aceLog.consoleControl.setAsOutput(_logger, _setup.name);
            }
            logger        = _logger;
            chunkComposer = chunker;
            setup         = _setup;
            tools         = _tools;
            tools.context = this;
            classes       = _classes;
            // masterConstructor = _masterExtractor.termTableConstructor;



            masterExtractor   = _setup.featureVectorExtractors_semantic.First();
            masterConstructor = masterExtractor.termTableConstructor;
            manager           = _manager;
            String expContextName = "exp_" + setup.name.add(sufix, "_");

            folder           = manager.folder.Add(expContextName, "Experiment " + setup.name, "Directory with all information on the experiment [" + setup.name + "]");
            errorNotesFolder = folder.Add("errors", "Error logs", "Directory with error reports produced if an exception occours. Normally, if everything was ok this folder should have only two files inside: directory_readme.txt and empty: note.txt).");
            errorNotes       = new experimentNotes(errorNotesFolder, "Notes (logs) about critical and non-critical errors that happen during experiment execution. If everything was ok - this file should remain empty");

            notes = new experimentNotes(folder, "Notes on experiment setup and execution log");
            aceLog.consoleControl.setAsOutput(notes, "Notes");

            notes.log("Experiment [" + expContextName + "] initiated");
            notes.AppendLine("About: " + setup.description);

            notes.AppendHorizontalLine();



            notes.SaveNote();
            notes.AppendHeading("Feature extraction models");

            var lnsc = chunkComposer.DescribeSelf();

            lnsc.ForEach(x => notes.AppendLine(x));
            notes.AppendLine(" - ");


            List <String> mdn = new List <string>();

            foreach (var md in setup.models)
            {
                if (mdn.Contains(md.name))
                {
                    md.name += "_" + mdn.Count.ToString();
                }
                else
                {
                    mdn.Add(md.name);
                }
            }

            foreach (var md in setup.models)
            {
                String prefix = md.name;
                md.classes = classes;
                md.BuildFeatureVectorDefinition();

                var lns = md.DescribeSelf();
                lns.ForEach(x => notes.AppendLine(x));



                kFoldValidationCollection validationCases = classes.BuildValidationCases(prefix, setup.validationSetup.k, tools.DoDebug, logger, folder, setup.validationSetup.randomize);
                validationCases.pipelineCollection = pipelineCollection;

                validationCases.connectContext(this, md);

                validationCollections.Add(md.name, validationCases);


                //md.postClassifiers = setup.classifiers;
            }
        }
Exemple #7
0
        public void MakeReports(experimentExecutionContext context, folderNode folder)
        {
            meanClassifierReport = new DocumentSetCaseCollectionReport(extractor.name);

            aceDictionary2D <IWebPostClassifier, kFoldValidationCase, DocumentSetCaseCollectionReport> tempStructure = new aceDictionary2D <IWebPostClassifier, kFoldValidationCase, DocumentSetCaseCollectionReport>();

            DSCCReports firstCase = null;
            List <IWebPostClassifier> classifiers = new List <IWebPostClassifier>();

            foreach (var kFoldCasePair in this)
            {
                if (firstCase == null)
                {
                    firstCase = kFoldCasePair.Value;
                }
                foreach (var pair in kFoldCasePair.Value.avgReports)
                {
                    tempStructure[pair.Key, kFoldCasePair.Key] = pair.Value;
                    if (!classifiers.Contains(pair.Key))
                    {
                        classifiers.Add(pair.Key);
                    }
                }
            }



            // DataSet dataSet = new DataSet(context.setup.name);



            // <---------- CREATING AVERAGE TABLE -----------------------------------------------------
            var tpAvgMacro = new DataTableTypeExtended <DocumentSetCaseCollectionReport>(context.setup.name + " summary", "Cross k-fold averages measures, fold-level measures are computed by macro-average method");
            var tpAvgMicro = new DataTableTypeExtended <DocumentSetCaseCollectionReport>(context.setup.name + " summary", "Cross k-fold averages measures, fold-level measures are computed by micro-average method");

            List <DocumentSetCaseCollectionReport> macroaverages = new List <DocumentSetCaseCollectionReport>();
            DataTableTypeExtended <DocumentSetCaseCollectionReport> EMperKFolds = new DataTableTypeExtended <DocumentSetCaseCollectionReport>(extractor.name + "_allReports");


            foreach (IWebPostClassifier classifier in classifiers)
            {
                // < ---- report on each classifier

                context.logger.log("-- producing report about [" + classifier.name + "]");
                //objectTable<DocumentSetCaseCollectionReport> tp = new objectTable<DocumentSetCaseCollectionReport>(nameof(DocumentSetCaseCollectionReport.Name), classifier + "_sum");



                DocumentSetCaseCollectionReport avg = new DocumentSetCaseCollectionReport(classifier.name + " macro-averaging, k-fold avg. ");

                DocumentSetCaseCollectionReport rep_eval = new DocumentSetCaseCollectionReport(classifier.name + " micro-averaging, k-fold avg.");

                rep_eval.Classifier = classifier.name;

                classificationEvalMetricSet metrics = new classificationEvalMetricSet();
                classificationEval          eval    = new classificationEval();
                //eval = metrics[classifier.name];

                Int32 c = 0;
                foreach (KeyValuePair <kFoldValidationCase, DSCCReports> kFoldCasePair in this)
                {
                    DocumentSetCaseCollectionReport rep   = kFoldCasePair.Value.avgReports[classifier];
                    kFoldValidationCase             vCase = kFoldCasePair.Key;


                    classificationEvalMetricSet met = rep.GetSetMetrics();

                    if (met != null)
                    {
                        foreach (IDocumentSetClass cl in context.classes.GetClasses())
                        {
                            eval = eval + met[cl.name];
                        }
                    }

                    rep.Name = classifier.name + "_" + vCase.name;
                    avg.AddValues(rep);
                    EMperKFolds.AddRow(rep);

                    c++;
                }

                rep_eval.AddValues(metrics, classificationMetricComputation.microAveraging);



                avg.Classifier = classifier.name;
                avg.DivideValues(c);

                // <<< detecting the best performed classifier in all evaluation folds
                if (avg.F1measure > highestF1Value)
                {
                    highestF1Value      = avg.F1measure;
                    topClassifierReport = avg;
                }

                meanClassifierReport.AddValues(avg);


                // -----------------

                EMperKFolds.AddRow(avg);

                tpAvgMacro.AddRow(avg);

                macroaverages.Add(avg);

                if (DOMAKE_MICROaverage)
                {
                    tpAvgMicro.AddRow(rep_eval);
                }
                // tp.Add(rep_eval);

                if (context.tools.operation.DoMakeReportForEachClassifier)
                {
                    DataTable cTable = EMperKFolds;
                    cTable.SetTitle($"{classifier.name} report");
                    cTable.SetDescription("Summary " + context.setup.validationSetup.k + "-fold validation report for [" + classifier.name + "]");


                    cTable.SetAdditionalInfoEntry("FV Extractor", extractor.name);
                    cTable.SetAdditionalInfoEntry("Classifier", classifier.name);
                    cTable.SetAdditionalInfoEntry("Class name", classifier.GetType().Name);

                    cTable.SetAdditionalInfoEntry("Correct", rep_eval.Correct);
                    cTable.SetAdditionalInfoEntry("Wrong", rep_eval.Wrong);

                    //cTable.SetAdditionalInfoEntry("Precision", rep_eval.Precision);
                    //cTable.SetAdditionalInfoEntry("Recall", rep_eval.Recall);
                    //cTable.SetAdditionalInfoEntry("F1", rep_eval.F1measure);

                    cTable.SetAdditionalInfoEntry("True Positives", metrics[classifier.name].truePositives);
                    cTable.SetAdditionalInfoEntry("False Negatives", metrics[classifier.name].falseNegatives);
                    cTable.SetAdditionalInfoEntry("False Positives", metrics[classifier.name].falsePositives);


                    cTable.AddExtra("Classifier: " + classifier.name + " [" + classifier.GetType().Name + "]");
                    var info = classifier.DescribeSelf();
                    info.ForEach(x => cTable.AddExtra(x));

                    cTable.AddExtra("-----------------------------------------------------------------------");

                    cTable.AddExtra("Precision, Recall and F1 measures expressed in this table are computed by macroaveraging shema");
                    //  output.CopyRowsFrom(cTable);


                    cTable.GetReportAndSave(folder, appManager.AppInfo, extractor.name + "_classifier_" + classifier.name);

                    // dataSet.AddTable(cTable);
                }
            }



            rangeFinderForDataTable rangerMacro = new rangeFinderForDataTable(tpAvgMacro, "Name");



            meanClassifierReport.DivideValues(classifiers.Count);
            if (macroaverages.Count > 0)
            {
                Double maxF1 = macroaverages.Max(x => x.F1measure);
                Double minF1 = macroaverages.Min(x => x.F1measure);

                List <String> minCaseNames = macroaverages.Where(x => x.F1measure == minF1).Select(x => x.Name).ToList();
                List <String> maxCaseNames = macroaverages.Where(x => x.F1measure == maxF1).Select(x => x.Name).ToList();


                var style = EMperKFolds.GetRowMetaSet().SetStyleForRowsWithValue <String>(DataRowInReportTypeEnum.dataHighlightA, nameof(DocumentSetCaseCollectionReport.Name), maxCaseNames);

                EMperKFolds.GetRowMetaSet().AddUnit(style);


                //  style = tpAvgMacro.GetRowMetaSet().SetStyleForRowsWithValue<String>(DataRowInReportTypeEnum.dataHighlightC, nameof(DocumentSetCaseCollectionReport.Name), minCaseNames);



                tpAvgMacro.SetAdditionalInfoEntry("FV Extractor", extractor.name);
                if (DOMAKE_MICROaverage)
                {
                    tpAvgMicro.SetAdditionalInfoEntry("FV Extractor", extractor.name);
                }


                List <String> averageNames = macroaverages.Select(x => x.Name).ToList();
                var           avg_style    = EMperKFolds.GetRowMetaSet().SetStyleForRowsWithValue <String>(DataRowInReportTypeEnum.dataHighlightC, nameof(DocumentSetCaseCollectionReport.Name), averageNames);
                foreach (var x in averageNames)
                {
                    avg_style.AddMatch(x);
                }
            }

            // ::: ------------------------------------------------------------------------------------------------- ::: --------------------------------------------------------------------- ::: //

            tpAvgMacro.SetTitle($"{extractor.name} - macroaverage report");
            if (DOMAKE_MICROaverage)
            {
                tpAvgMicro.SetTitle($"{extractor.name} - microaverage report");
            }

            tpAvgMacro.AddExtra("Complete report on " + context.setup.validationSetup.k + "-fold validation FVE [" + extractor.name + "]");
            tpAvgMacro.AddExtra("Fold-level P, R and F1 measures are computed by macroaveraging method, values here are cross k-fold means.");

            if (DOMAKE_MICROaverage)
            {
                tpAvgMicro.AddExtra("Complete " + context.setup.validationSetup.k + "-fold validation report for FVE [" + extractor.name + "]");
            }
            if (DOMAKE_MICROaverage)
            {
                tpAvgMicro.AddExtra("Fold-level P, R and F1 measures are computed by microaveraging method, values here are cross k-fold means.");
            }

            context.AddExperimentInfo(tpAvgMacro);
            if (DOMAKE_MICROaverage)
            {
                context.AddExperimentInfo(tpAvgMicro);
            }

            tpAvgMacro.AddExtra(extractor.description);


            if (extractor is semanticFVExtractor)
            {
                semanticFVExtractor semExtractor = (semanticFVExtractor)extractor;

                semExtractor.termTableConstructor.DescribeSelf().ForEach(x => tpAvgMacro.AddExtra(x));
                semExtractor.CloudConstructor.DescribeSelf().ForEach(x => tpAvgMacro.AddExtra(x));
                semExtractor.termTableConstructor.DescribeSelf().ForEach(x => tpAvgMicro.AddExtra(x));
                semExtractor.CloudConstructor.DescribeSelf().ForEach(x => tpAvgMicro.AddExtra(x));
            }

            context.logger.log("-- producing summary reports on [" + extractor.name + "]");

            rangerMacro.AddRangeRows("Macroaverage ", tpAvgMacro, true,
                                     imbSCI.Core.math.aggregation.dataPointAggregationType.min | imbSCI.Core.math.aggregation.dataPointAggregationType.max
                                     | imbSCI.Core.math.aggregation.dataPointAggregationType.avg
                                     | imbSCI.Core.math.aggregation.dataPointAggregationType.stdev);
            tpAvgMacro.GetReportAndSave(folder, appManager.AppInfo, extractor.name + "_macroaverage_report", true, true);


            EMperKFolds.AddExtra("The table shows average measures for each fold --- rows marked with colored background show averages for all folds, per classifier.");

            EMperKFolds.GetReportAndSave(folder, appManager.AppInfo, extractor.name + "_allFolds", true, true);

            if (DOMAKE_MICROaverage)
            {
                tpAvgMicro.GetReportAndSave(folder, appManager.AppInfo, extractor.name + "_microaverage_report", true, true);
            }
            //dataSet.GetReportVersion().serializeDataSet(extractor.name + "_classifiers_MultiSheetSummary", folder, imbSCI.Data.enums.reporting.dataTableExportEnum.excel, appManager.AppInfo);
        }
Exemple #8
0
        /// <summary>
        /// Updates the secondary record.
        /// </summary>
        /// <param name="fve">The fve.</param>
        public void UpdateSecondaryRecord(semanticFVExtractor fve)
        {
            //semanticFVExtractor fve = this;
            var record = this;

            record.FVEModel = fve.name;


            //record.FVEHash = fve.get

            record.TermDemotion = "";
            if (fve.settings.semanticCloudFilter.doDemoteAnyRepeatingPrimaryTerm)
            {
                record.TermDemotion += "[P-DEM]";
            }
            if (fve.settings.semanticCloudFilter.doDemoteAnyRepeatingSecondaryTerm)
            {
                record.TermDemotion += "[S-DEM]";
            }
            if (fve.settings.semanticCloudFilter.doAssignMicroWeightInsteadOfRemoval)
            {
                record.TermDemotion += "[P-MIN]";
            }

            foreach (var fv in fve.settings.featureVectors.serialization)
            {
                if (fv.isActive)
                {
                    record.FVPType += fv.name + " ";
                }
            }

            record.IDF = fve.termTableConstructor.settings.doUseIDF;
            record.DFC = fve.termTableConstructor.settings.documentFrequencyMaxFactor;

            record.CloudDSF = fve.CloudConstructor.settings.documentSetFreqLowLimit;
            record.CloudTCF = fve.CloudConstructor.settings.termInChunkLowerLimit;
            record.CloudPTT = fve.CloudConstructor.settings.primaryTermLowTargetCount;

            record.CloudAlgorithm = fve.CloudConstructor.settings.algorithm.ToString();



            record.HTMLTagFactors = fve.termTableConstructor.settings.titleTextFactor + ":" + fve.termTableConstructor.settings.anchorTextFactor + ":" + fve.termTableConstructor.settings.contentTextFactor;
            record.TCBOn          = fve.CloudConstructor.settings.doFactorToClassClouds;
            record.TermCategory   = fve.CloudConstructor.settings.PrimaryTermWeightFactor + ":" + fve.CloudConstructor.settings.SecondaryTermWeightFactor + ":" + fve.CloudConstructor.settings.ReserveTermWeightFactor;

            if (fve.settings.semanticCloudFilter.doDivideWeightWithCloudFrequency || fve.settings.semanticCloudFilter.doUseSquareFunctionOfCF)
            {
                if (!fve.settings.semanticCloudFilter.doUseSquareFunctionOfCF)
                {
                    record.ReduxFunction = "[1/CF]";
                }
                else
                {
                    record.ReduxFunction = "[1/Sq(CF)]";
                }
            }
            else
            {
                record.ReduxFunction = "[OFF]";
            }
            record.TermExpansionOptions = fve.settings.caseTermExpansionOptions.ToString();

            List <String> ops  = record.TermExpansionOptions.SplitSmart(",");
            List <String> tags = new List <string>();
            String        teo  = "";

            foreach (String op in ops)
            {
                teo = teo.add(op.imbGetAbbrevation(3, true), ",");
            }

            record.TermExpansionOptions = teo;
            record.StrictPOS            = fve.termTableConstructor.settings.strictPosTypePolicy;


            record.TermExpansion = fve.settings.caseTermExpansionSteps;
            record.LowpassFreq   = fve.settings.semanticCloudFilter.lowPassFilter;

            if (fve.settings.semanticCloudFilter.isActive)
            {
                if (fve.settings.semanticCloudFilter.doCutOffByCloudFrequency)
                {
                    if (!fve.settings.semanticCloudFilter.doAssignMicroWeightInsteadOfRemoval)
                    {
                        record.LowPassFunction = "[Remove]";
                    }
                    else
                    {
                        record.LowPassFunction = "[W=mini]";
                    }
                }
                else
                {
                    record.LowPassFunction = "[OFF]";
                }
            }
            else
            {
                record.LowPassFunction = "[OFF]";
                record.LowpassFreq     = 0;
            }


            String ls = objectSerialization.ObjectToXML(fve);

            record.FVEHash = md5.GetMd5Hash(ls, false);

            record.UID = md5.GetMd5Hash(Path + record.FVEHash + record.Experiment, false);
        }