/// <summary> /// Loads the semantic clouds of the specified FVE /// </summary> /// <param name="fve">The fve.</param> /// <param name="logger">The logger.</param> public void LoadSemanticClouds(semanticFVExtractor fve, ILogBuilder logger) { DirectoryInfo di = folder; var dirs = di.GetDirectories(fve.name); if (!dirs.Any()) { logger.log("Failed to find subfolder for FVE [" + fve.name + "]"); } else { DirectoryInfo dir = dirs.First(); folderNode fveFolder = dir; var allCloudFiles = fveFolder.findFiles("*Cloud.xml", SearchOption.AllDirectories); Int32 cl = 0; foreach (String cloudFile in allCloudFiles) { if (cloudFile.Contains("General") || cloudFile.Contains("SharedKnowledge")) { } else { semanticClouds.Add(fve.name, objectSerialization.loadObjectFromXML <lemmaSemanticCloud>(cloudFile, logger)); cl++; } } logger.log("Semantic clouds loaded [" + cl + "] for " + fve.name); } }
public semanticFVExtractor CreateExperiment(experimentSetup model, ILogBuilder output) { if (model == null) { model = blueprint; } semanticFVExtractor fve = model.models.First() as semanticFVExtractor; String serializedModel = objectSerialization.ObjectToXML(model); foreach (experimentTemplateVariable variable in replacements) { String newModelXML = serializedModel; serializedModel = serializedModel.Replace(variable.needle, variable.replace); semanticFVExtractor newModel = objectSerialization.ObjectFromXML <semanticFVExtractor>(newModelXML); newModel.name = newModel.GetShortName(name); newModel.description = newModel.GetShortDescription(); output.log("-- created model: " + newModel.name); //experiment.featureVectorExtractors_semantic.Add(newModel); } //for (INt32 i = start; i < end; i++) //{ // String newModelXML = serializedModel; // newModelXML = newModelXML.Replace(needle, "<caseTermExpansionSteps>" + i + "</caseTermExpansionSteps>"); //} return(fve); }
/// <summary> /// Sets the tw. /// </summary> /// <param name="fve">The fve.</param> /// <param name="flag">The flag.</param> public static void SetTW(this semanticFVExtractor fve, String flag = "std", Double DFC = 1.1, Boolean IDFOn = true) { flag = flag.ToLower(); fve.termTableConstructor.settings.documentFrequencyMaxFactor = DFC; fve.termTableConstructor.settings.doUseIDF = IDFOn; switch (flag) { case "std": fve.termTableConstructor.settings.titleTextFactor = 1; fve.termTableConstructor.settings.anchorTextFactor = 0.75; fve.termTableConstructor.settings.contentTextFactor = 0.5; break; case "bst": fve.termTableConstructor.settings.titleTextFactor = 10; fve.termTableConstructor.settings.anchorTextFactor = 1; fve.termTableConstructor.settings.contentTextFactor = 0.1; break; case "off": fve.termTableConstructor.settings.titleTextFactor = 1; fve.termTableConstructor.settings.anchorTextFactor = 1; fve.termTableConstructor.settings.contentTextFactor = 1; break; } }
public static experimentSetup GetDefaultExperimentSetup() { experimentSetup setup = new experimentSetup(); setup.classifiers_settings.Add(new WebPostClassifierSettings(WebPostClassifierType.kNearestNeighbors, "kNN")); setup.classifiers_settings.Add(new WebPostClassifierSettings(WebPostClassifierType.multiClassSVM, "mSVM")); setup.classifiers_settings.Add(new WebPostClassifierSettings(WebPostClassifierType.naiveBayes, "nBayes")); setup.classifiers_settings.Add(new WebPostClassifierSettings(WebPostClassifierType.backPropagationActivationNeuralNetwork, "bpANN")); //= new List<pos_type> { pos_type.A, pos_type.N }; var tfe = new semanticFVExtractor(); tfe.termTableConstructor.settings.allowedLemmaTypes.AddUnique(pos_type.A); tfe.termTableConstructor.settings.allowedLemmaTypes.AddUnique(pos_type.N); var sfe = new semanticFVExtractor(); sfe.termTableConstructor.settings.allowedLemmaTypes.AddUnique(pos_type.A); sfe.termTableConstructor.settings.allowedLemmaTypes.AddUnique(pos_type.N); setup.featureVectorExtractors_semantic.Add(tfe); setup.featureVectorExtractors_semantic.Add(sfe); setup.validationSetup.name = setup.name; setup.validationSetup.k = 5; setup.setClassifiers(); return(setup); }
public experimentTemplate MakeTemplate([Description("Name of the experiment to process")] experimentSetup experiment, [Description("Starting value for Stx")] Int32 start = 3, [Description("Ending value for Sts")] Int32 end = 8, [Description("3 or 4 letter code indicating how the settings are different then in other experiments")] String name = "", [Description("Comment line for experiment header")] String comment = "", experimentTemplateOption option = experimentTemplateOption.STX) { experimentTemplate output = new experimentTemplate(); output.DeriveBlueprint(experiment, name); output.comment = comment; semanticFVExtractor model = experiment.models.First() as semanticFVExtractor; experiment.RemoveAllModelsExcept(); String currentValue = ""; String xmlElementName = ""; switch (option) { case experimentTemplateOption.LPF: currentValue = model.settings.semanticCloudFilter.lowPassFilter.ToString(); xmlElementName = "lowPassFilter"; break; case experimentTemplateOption.none: break; case experimentTemplateOption.REPEAT: break; case experimentTemplateOption.STX: currentValue = model.settings.caseTermExpansionSteps.ToString(); xmlElementName = "caseTermExpansionSteps"; break; case experimentTemplateOption.TC: break; } for (int i = start; i < end; i++) { var exp = new experimentTemplateVariable(); exp.needle = "<" + xmlElementName + ">" + currentValue + "</" + xmlElementName + ">"; exp.replace = "<" + xmlElementName + ">" + i + "</" + xmlElementName + ">"; exp.i = i; output.replacements.Add(exp); } return(output); }
/// <summary> /// Sets the execution context. /// </summary> /// <param name="_manager">The manager.</param> /// <param name="_setup">The setup.</param> /// <param name="_tools">The tools.</param> /// <param name="_classes">The classes.</param> /// <param name="sufix">The sufix.</param> /// <param name="chunker">The chunker.</param> /// <param name="_masterExtractor">The master extractor.</param> /// <param name="_logger">The logger.</param> public void SetExecutionContext(experimentManager _manager, experimentSetup _setup, classifierTools _tools, DocumentSetClasses _classes, String sufix, chunkComposerBasic chunker, semanticFVExtractor _masterExtractor, ILogBuilder _logger = null) { if (_logger == null) { _logger = new builderForLog(); aceLog.consoleControl.setAsOutput(_logger, _setup.name); } logger = _logger; chunkComposer = chunker; setup = _setup; tools = _tools; tools.context = this; classes = _classes; // masterConstructor = _masterExtractor.termTableConstructor; masterExtractor = _setup.featureVectorExtractors_semantic.First(); masterConstructor = masterExtractor.termTableConstructor; manager = _manager; String expContextName = "exp_" + setup.name.add(sufix, "_"); folder = manager.folder.Add(expContextName, "Experiment " + setup.name, "Directory with all information on the experiment [" + setup.name + "]"); errorNotesFolder = folder.Add("errors", "Error logs", "Directory with error reports produced if an exception occours. Normally, if everything was ok this folder should have only two files inside: directory_readme.txt and empty: note.txt)."); errorNotes = new experimentNotes(errorNotesFolder, "Notes (logs) about critical and non-critical errors that happen during experiment execution. If everything was ok - this file should remain empty"); notes = new experimentNotes(folder, "Notes on experiment setup and execution log"); aceLog.consoleControl.setAsOutput(notes, "Notes"); notes.log("Experiment [" + expContextName + "] initiated"); notes.AppendLine("About: " + setup.description); notes.AppendHorizontalLine(); notes.SaveNote(); notes.AppendHeading("Feature extraction models"); var lnsc = chunkComposer.DescribeSelf(); lnsc.ForEach(x => notes.AppendLine(x)); notes.AppendLine(" - "); List <String> mdn = new List <string>(); foreach (var md in setup.models) { if (mdn.Contains(md.name)) { md.name += "_" + mdn.Count.ToString(); } else { mdn.Add(md.name); } } foreach (var md in setup.models) { String prefix = md.name; md.classes = classes; md.BuildFeatureVectorDefinition(); var lns = md.DescribeSelf(); lns.ForEach(x => notes.AppendLine(x)); kFoldValidationCollection validationCases = classes.BuildValidationCases(prefix, setup.validationSetup.k, tools.DoDebug, logger, folder, setup.validationSetup.randomize); validationCases.pipelineCollection = pipelineCollection; validationCases.connectContext(this, md); validationCollections.Add(md.name, validationCases); //md.postClassifiers = setup.classifiers; } }
public void MakeReports(experimentExecutionContext context, folderNode folder) { meanClassifierReport = new DocumentSetCaseCollectionReport(extractor.name); aceDictionary2D <IWebPostClassifier, kFoldValidationCase, DocumentSetCaseCollectionReport> tempStructure = new aceDictionary2D <IWebPostClassifier, kFoldValidationCase, DocumentSetCaseCollectionReport>(); DSCCReports firstCase = null; List <IWebPostClassifier> classifiers = new List <IWebPostClassifier>(); foreach (var kFoldCasePair in this) { if (firstCase == null) { firstCase = kFoldCasePair.Value; } foreach (var pair in kFoldCasePair.Value.avgReports) { tempStructure[pair.Key, kFoldCasePair.Key] = pair.Value; if (!classifiers.Contains(pair.Key)) { classifiers.Add(pair.Key); } } } // DataSet dataSet = new DataSet(context.setup.name); // <---------- CREATING AVERAGE TABLE ----------------------------------------------------- var tpAvgMacro = new DataTableTypeExtended <DocumentSetCaseCollectionReport>(context.setup.name + " summary", "Cross k-fold averages measures, fold-level measures are computed by macro-average method"); var tpAvgMicro = new DataTableTypeExtended <DocumentSetCaseCollectionReport>(context.setup.name + " summary", "Cross k-fold averages measures, fold-level measures are computed by micro-average method"); List <DocumentSetCaseCollectionReport> macroaverages = new List <DocumentSetCaseCollectionReport>(); DataTableTypeExtended <DocumentSetCaseCollectionReport> EMperKFolds = new DataTableTypeExtended <DocumentSetCaseCollectionReport>(extractor.name + "_allReports"); foreach (IWebPostClassifier classifier in classifiers) { // < ---- report on each classifier context.logger.log("-- producing report about [" + classifier.name + "]"); //objectTable<DocumentSetCaseCollectionReport> tp = new objectTable<DocumentSetCaseCollectionReport>(nameof(DocumentSetCaseCollectionReport.Name), classifier + "_sum"); DocumentSetCaseCollectionReport avg = new DocumentSetCaseCollectionReport(classifier.name + " macro-averaging, k-fold avg. "); DocumentSetCaseCollectionReport rep_eval = new DocumentSetCaseCollectionReport(classifier.name + " micro-averaging, k-fold avg."); rep_eval.Classifier = classifier.name; classificationEvalMetricSet metrics = new classificationEvalMetricSet(); classificationEval eval = new classificationEval(); //eval = metrics[classifier.name]; Int32 c = 0; foreach (KeyValuePair <kFoldValidationCase, DSCCReports> kFoldCasePair in this) { DocumentSetCaseCollectionReport rep = kFoldCasePair.Value.avgReports[classifier]; kFoldValidationCase vCase = kFoldCasePair.Key; classificationEvalMetricSet met = rep.GetSetMetrics(); if (met != null) { foreach (IDocumentSetClass cl in context.classes.GetClasses()) { eval = eval + met[cl.name]; } } rep.Name = classifier.name + "_" + vCase.name; avg.AddValues(rep); EMperKFolds.AddRow(rep); c++; } rep_eval.AddValues(metrics, classificationMetricComputation.microAveraging); avg.Classifier = classifier.name; avg.DivideValues(c); // <<< detecting the best performed classifier in all evaluation folds if (avg.F1measure > highestF1Value) { highestF1Value = avg.F1measure; topClassifierReport = avg; } meanClassifierReport.AddValues(avg); // ----------------- EMperKFolds.AddRow(avg); tpAvgMacro.AddRow(avg); macroaverages.Add(avg); if (DOMAKE_MICROaverage) { tpAvgMicro.AddRow(rep_eval); } // tp.Add(rep_eval); if (context.tools.operation.DoMakeReportForEachClassifier) { DataTable cTable = EMperKFolds; cTable.SetTitle($"{classifier.name} report"); cTable.SetDescription("Summary " + context.setup.validationSetup.k + "-fold validation report for [" + classifier.name + "]"); cTable.SetAdditionalInfoEntry("FV Extractor", extractor.name); cTable.SetAdditionalInfoEntry("Classifier", classifier.name); cTable.SetAdditionalInfoEntry("Class name", classifier.GetType().Name); cTable.SetAdditionalInfoEntry("Correct", rep_eval.Correct); cTable.SetAdditionalInfoEntry("Wrong", rep_eval.Wrong); //cTable.SetAdditionalInfoEntry("Precision", rep_eval.Precision); //cTable.SetAdditionalInfoEntry("Recall", rep_eval.Recall); //cTable.SetAdditionalInfoEntry("F1", rep_eval.F1measure); cTable.SetAdditionalInfoEntry("True Positives", metrics[classifier.name].truePositives); cTable.SetAdditionalInfoEntry("False Negatives", metrics[classifier.name].falseNegatives); cTable.SetAdditionalInfoEntry("False Positives", metrics[classifier.name].falsePositives); cTable.AddExtra("Classifier: " + classifier.name + " [" + classifier.GetType().Name + "]"); var info = classifier.DescribeSelf(); info.ForEach(x => cTable.AddExtra(x)); cTable.AddExtra("-----------------------------------------------------------------------"); cTable.AddExtra("Precision, Recall and F1 measures expressed in this table are computed by macroaveraging shema"); // output.CopyRowsFrom(cTable); cTable.GetReportAndSave(folder, appManager.AppInfo, extractor.name + "_classifier_" + classifier.name); // dataSet.AddTable(cTable); } } rangeFinderForDataTable rangerMacro = new rangeFinderForDataTable(tpAvgMacro, "Name"); meanClassifierReport.DivideValues(classifiers.Count); if (macroaverages.Count > 0) { Double maxF1 = macroaverages.Max(x => x.F1measure); Double minF1 = macroaverages.Min(x => x.F1measure); List <String> minCaseNames = macroaverages.Where(x => x.F1measure == minF1).Select(x => x.Name).ToList(); List <String> maxCaseNames = macroaverages.Where(x => x.F1measure == maxF1).Select(x => x.Name).ToList(); var style = EMperKFolds.GetRowMetaSet().SetStyleForRowsWithValue <String>(DataRowInReportTypeEnum.dataHighlightA, nameof(DocumentSetCaseCollectionReport.Name), maxCaseNames); EMperKFolds.GetRowMetaSet().AddUnit(style); // style = tpAvgMacro.GetRowMetaSet().SetStyleForRowsWithValue<String>(DataRowInReportTypeEnum.dataHighlightC, nameof(DocumentSetCaseCollectionReport.Name), minCaseNames); tpAvgMacro.SetAdditionalInfoEntry("FV Extractor", extractor.name); if (DOMAKE_MICROaverage) { tpAvgMicro.SetAdditionalInfoEntry("FV Extractor", extractor.name); } List <String> averageNames = macroaverages.Select(x => x.Name).ToList(); var avg_style = EMperKFolds.GetRowMetaSet().SetStyleForRowsWithValue <String>(DataRowInReportTypeEnum.dataHighlightC, nameof(DocumentSetCaseCollectionReport.Name), averageNames); foreach (var x in averageNames) { avg_style.AddMatch(x); } } // ::: ------------------------------------------------------------------------------------------------- ::: --------------------------------------------------------------------- ::: // tpAvgMacro.SetTitle($"{extractor.name} - macroaverage report"); if (DOMAKE_MICROaverage) { tpAvgMicro.SetTitle($"{extractor.name} - microaverage report"); } tpAvgMacro.AddExtra("Complete report on " + context.setup.validationSetup.k + "-fold validation FVE [" + extractor.name + "]"); tpAvgMacro.AddExtra("Fold-level P, R and F1 measures are computed by macroaveraging method, values here are cross k-fold means."); if (DOMAKE_MICROaverage) { tpAvgMicro.AddExtra("Complete " + context.setup.validationSetup.k + "-fold validation report for FVE [" + extractor.name + "]"); } if (DOMAKE_MICROaverage) { tpAvgMicro.AddExtra("Fold-level P, R and F1 measures are computed by microaveraging method, values here are cross k-fold means."); } context.AddExperimentInfo(tpAvgMacro); if (DOMAKE_MICROaverage) { context.AddExperimentInfo(tpAvgMicro); } tpAvgMacro.AddExtra(extractor.description); if (extractor is semanticFVExtractor) { semanticFVExtractor semExtractor = (semanticFVExtractor)extractor; semExtractor.termTableConstructor.DescribeSelf().ForEach(x => tpAvgMacro.AddExtra(x)); semExtractor.CloudConstructor.DescribeSelf().ForEach(x => tpAvgMacro.AddExtra(x)); semExtractor.termTableConstructor.DescribeSelf().ForEach(x => tpAvgMicro.AddExtra(x)); semExtractor.CloudConstructor.DescribeSelf().ForEach(x => tpAvgMicro.AddExtra(x)); } context.logger.log("-- producing summary reports on [" + extractor.name + "]"); rangerMacro.AddRangeRows("Macroaverage ", tpAvgMacro, true, imbSCI.Core.math.aggregation.dataPointAggregationType.min | imbSCI.Core.math.aggregation.dataPointAggregationType.max | imbSCI.Core.math.aggregation.dataPointAggregationType.avg | imbSCI.Core.math.aggregation.dataPointAggregationType.stdev); tpAvgMacro.GetReportAndSave(folder, appManager.AppInfo, extractor.name + "_macroaverage_report", true, true); EMperKFolds.AddExtra("The table shows average measures for each fold --- rows marked with colored background show averages for all folds, per classifier."); EMperKFolds.GetReportAndSave(folder, appManager.AppInfo, extractor.name + "_allFolds", true, true); if (DOMAKE_MICROaverage) { tpAvgMicro.GetReportAndSave(folder, appManager.AppInfo, extractor.name + "_microaverage_report", true, true); } //dataSet.GetReportVersion().serializeDataSet(extractor.name + "_classifiers_MultiSheetSummary", folder, imbSCI.Data.enums.reporting.dataTableExportEnum.excel, appManager.AppInfo); }
/// <summary> /// Updates the secondary record. /// </summary> /// <param name="fve">The fve.</param> public void UpdateSecondaryRecord(semanticFVExtractor fve) { //semanticFVExtractor fve = this; var record = this; record.FVEModel = fve.name; //record.FVEHash = fve.get record.TermDemotion = ""; if (fve.settings.semanticCloudFilter.doDemoteAnyRepeatingPrimaryTerm) { record.TermDemotion += "[P-DEM]"; } if (fve.settings.semanticCloudFilter.doDemoteAnyRepeatingSecondaryTerm) { record.TermDemotion += "[S-DEM]"; } if (fve.settings.semanticCloudFilter.doAssignMicroWeightInsteadOfRemoval) { record.TermDemotion += "[P-MIN]"; } foreach (var fv in fve.settings.featureVectors.serialization) { if (fv.isActive) { record.FVPType += fv.name + " "; } } record.IDF = fve.termTableConstructor.settings.doUseIDF; record.DFC = fve.termTableConstructor.settings.documentFrequencyMaxFactor; record.CloudDSF = fve.CloudConstructor.settings.documentSetFreqLowLimit; record.CloudTCF = fve.CloudConstructor.settings.termInChunkLowerLimit; record.CloudPTT = fve.CloudConstructor.settings.primaryTermLowTargetCount; record.CloudAlgorithm = fve.CloudConstructor.settings.algorithm.ToString(); record.HTMLTagFactors = fve.termTableConstructor.settings.titleTextFactor + ":" + fve.termTableConstructor.settings.anchorTextFactor + ":" + fve.termTableConstructor.settings.contentTextFactor; record.TCBOn = fve.CloudConstructor.settings.doFactorToClassClouds; record.TermCategory = fve.CloudConstructor.settings.PrimaryTermWeightFactor + ":" + fve.CloudConstructor.settings.SecondaryTermWeightFactor + ":" + fve.CloudConstructor.settings.ReserveTermWeightFactor; if (fve.settings.semanticCloudFilter.doDivideWeightWithCloudFrequency || fve.settings.semanticCloudFilter.doUseSquareFunctionOfCF) { if (!fve.settings.semanticCloudFilter.doUseSquareFunctionOfCF) { record.ReduxFunction = "[1/CF]"; } else { record.ReduxFunction = "[1/Sq(CF)]"; } } else { record.ReduxFunction = "[OFF]"; } record.TermExpansionOptions = fve.settings.caseTermExpansionOptions.ToString(); List <String> ops = record.TermExpansionOptions.SplitSmart(","); List <String> tags = new List <string>(); String teo = ""; foreach (String op in ops) { teo = teo.add(op.imbGetAbbrevation(3, true), ","); } record.TermExpansionOptions = teo; record.StrictPOS = fve.termTableConstructor.settings.strictPosTypePolicy; record.TermExpansion = fve.settings.caseTermExpansionSteps; record.LowpassFreq = fve.settings.semanticCloudFilter.lowPassFilter; if (fve.settings.semanticCloudFilter.isActive) { if (fve.settings.semanticCloudFilter.doCutOffByCloudFrequency) { if (!fve.settings.semanticCloudFilter.doAssignMicroWeightInsteadOfRemoval) { record.LowPassFunction = "[Remove]"; } else { record.LowPassFunction = "[W=mini]"; } } else { record.LowPassFunction = "[OFF]"; } } else { record.LowPassFunction = "[OFF]"; record.LowpassFreq = 0; } String ls = objectSerialization.ObjectToXML(fve); record.FVEHash = md5.GetMd5Hash(ls, false); record.UID = md5.GetMd5Hash(Path + record.FVEHash + record.Experiment, false); }