/// <summary> /// Deploys the specified axis x. /// </summary> /// <param name="__axisX">The axis x.</param> /// <param name="__axisY">The axis y.</param> /// <param name="__init">The initialize.</param> protected void deploy(IEnumerable <TXAxis> __axisX, IEnumerable <TYAxis> __axisY, TRelation __init) { axisY.Clear(); axisX.Clear(); values = new aceDictionary2D <int, int, TRelation>(); y = 0; foreach (TYAxis aY in __axisY) { axisY.Add(aY); y++; } x = 0; foreach (TXAxis aX in __axisX) { axisX.Add(aX); x++; } for (int i = 0; i < x; i++) { for (int j = 0; j < y; j++) { values[i, j] = __init; } } }
public static DataTable GetParallelAggregates <T>(this List <IEnumerable <T> > sources, string column_snap, string column_prefix, dataPointAggregationType column_sideAggregates, params string[] column_toInclude) { settingsEntriesForObject sEO = new settingsEntriesForObject(typeof(T)); settingsPropertyEntry sPE_snap = sEO.spes[column_snap]; settingsPropertyEntry sPE_prefix = sEO.spes[column_prefix]; List <settingsPropertyEntry> sPE_toInclude = new List <settingsPropertyEntry>(); foreach (string toInclude in column_toInclude) { sPE_toInclude.Add(sEO.spes[toInclude]); } List <dataPointAggregationType> side_aggregates = column_sideAggregates.getEnumListFromFlags <dataPointAggregationType>(); Dictionary <dataPointAggregationType, settingsPropertyEntry> sPE_sideAggregates = new Dictionary <dataPointAggregationType, settingsPropertyEntry>(); Dictionary <settingsPropertyEntry, dataPointAggregationType> sPE_sideAggregatesContra = new Dictionary <settingsPropertyEntry, dataPointAggregationType>(); foreach (settingsPropertyEntry toInclude in sPE_toInclude) { foreach (dataPointAggregationType sideType in side_aggregates) { settingsPropertyEntry sPE = new settingsPropertyEntry(toInclude.pi); sPE.type = typeof(double); sPE.name = sPE.name + "_" + sideType.ToString(); sPE_sideAggregates.Add(sideType, sPE); sPE_sideAggregatesContra.Add(sPE, sideType); } } // <---------------------------- preparing data Dictionary <string, IEnumerable <T> > dataByPrefix = new Dictionary <string, IEnumerable <T> >(); int c = 0; foreach (IEnumerable <T> s in sources) { T firstItem = s.FirstOrDefault <T>(); if (firstItem != null) { string prefix = firstItem.imbGetPropertySafe(sPE_prefix.pi).toStringSafe(c.ToString("D3")); dataByPrefix.Add(prefix, s); } c++; } // <----- DataColumn Index aceDictionarySet <string, DataColumn> columnsByPrefix = new aceDictionarySet <string, DataColumn>(); aceDictionarySet <string, DataColumn> columnsSideAggregationByPrefix = new aceDictionarySet <string, DataColumn>(); // <------------------------- building Shema DataTable output = new DataTable(); output.TableName = "ParallelAggregate_by_" + column_snap; DataColumn col_recordID = output.Add("ID", "Row ordinal number", "ID", typeof(int), dataPointImportance.normal, "D3").SetUnit("#"); settingsPropertyEntry sPE_recID = col_recordID.GetSPE(); DataColumn col_snap = output.Add(sPE_snap); aceDictionary2D <settingsPropertyEntry, dataPointAggregationType, DataColumn> columnsByAggregationType = new aceDictionary2D <settingsPropertyEntry, dataPointAggregationType, DataColumn>(); aceDictionarySet <settingsPropertyEntry, DataColumn> columnsBySource = new aceDictionarySet <settingsPropertyEntry, DataColumn>(); foreach (settingsPropertyEntry toInclude in sPE_toInclude) { foreach (var pair in dataByPrefix) { DataColumn nColumn = output.Add(toInclude); nColumn.ColumnName = pair.Key + "_" + nColumn.ColumnName; nColumn.SetGroup(pair.Key); columnsByPrefix.Add(pair.Key, nColumn); columnsBySource.Add(toInclude, nColumn); } foreach (var pair2 in sPE_sideAggregatesContra) { DataColumn nColumn2 = output.Add(toInclude); nColumn2.SetGroup("Aggregate"); // columnsSideAggregationByPrefix.Add(pair.Key, nColumn); } } // <----------------------------------------------------------- collecting rows aceDictionary2D <string, settingsPropertyEntry, object> dataRowBySnapValue = new aceDictionary2D <string, settingsPropertyEntry, object>(); int riMax = 0; foreach (string prefix in dataByPrefix.Keys) { IEnumerable <T> s = dataByPrefix[prefix]; int ri = 0; foreach (T si in s) { ri++; string snapValue = si.imbGetPropertySafe(sPE_snap.pi).toStringSafe(); dataRowBySnapValue[snapValue, sPE_snap] = snapValue; dataRowBySnapValue[snapValue, sPE_recID] = ri; foreach (settingsPropertyEntry toInclude in sPE_toInclude) { foreach (var pair in columnsByPrefix[prefix]) { var spe = dataColumnRenderingSetup.GetSPE(pair); dataRowBySnapValue[snapValue, spe] = si.imbGetPropertySafe(spe.pi); } } riMax = Math.Max(ri, riMax); } } foreach (string prefix in dataByPrefix.Keys) { } //List<Double> data = new List<Double>(); //foreach (var pair2 in columnsSideAggregationByPrefix[prefix]) //{ // var spe2 = pair.GetSPE(); // dataRowBySnapValue[snapValue, spe2] = si.imbGetPropertySafe(spe2.pi); //} return(output); }
/// <summary> /// Gets aggregated version of the objects /// </summary> /// <typeparam name="T"></typeparam> /// <param name="source">The source.</param> /// <param name="type">The type.</param> /// <returns></returns> public static collectionAggregationResult <T> GetAggregates <T>(this IEnumerable <T> source, dataPointAggregationType type = dataPointAggregationType.avg, bool stringKeepLastEntry = true) where T : class, new() { //if (type == dataPointAggregationType.none) //{ //} var aggList = type.getEnumListFromFlags <dataPointAggregationType>(); collectionAggregationResult <T> output = new collectionAggregationResult <T>(); output.aspect = dataPointAggregationAspect.subSetOfRows; aceDictionary2D <dataPointAggregationType, PropertyInfo, double> outputData = new aceDictionary2D <dataPointAggregationType, PropertyInfo, double>(); aceDictionary2D <dataPointAggregationType, PropertyInfo, List <double> > datatCollections = new aceDictionary2D <dataPointAggregationType, PropertyInfo, List <double> >(); Type t = typeof(T); List <PropertyInfo> nominalList = new List <PropertyInfo>(); List <PropertyInfo> piList = new List <PropertyInfo>(); Dictionary <PropertyInfo, settingsPropertyEntry> sPEDict = new Dictionary <PropertyInfo, settingsPropertyEntry>(); foreach (PropertyInfo pi in t.GetProperties(BindingFlags.Public | BindingFlags.Instance | BindingFlags.SetProperty | BindingFlags.GetProperty)) { settingsPropertyEntry sPE = new settingsPropertyEntry(pi); bool ok = true; if (!pi.CanWrite) { ok = false; } if (ok && pi.PropertyType == typeof(string)) { nominalList.Add(pi); ok = false; } else if (ok && pi.PropertyType == typeof(Enum)) { ok = false; } if (ok && sPE.aggregation[dataPointAggregationAspect.subSetOfRows].HasFlag(dataPointAggregationType.hidden)) { ok = false; } if (ok && sPE.attributes.ContainsKey(imbAttributeName.reporting_hide)) { ok = false; } if (ok) { sPEDict.Add(pi, sPE); piList.Add(pi); } } if (aggList.Contains(dataPointAggregationType.avg)) { aggList.AddUnique(dataPointAggregationType.sum); } if (aggList.Contains(dataPointAggregationType.range)) { aggList.AddUnique(dataPointAggregationType.min); aggList.AddUnique(dataPointAggregationType.max); } foreach (dataPointAggregationType aggType in aggList) { output.Add(aggType, new T()); switch (aggType) { case dataPointAggregationType.var: case dataPointAggregationType.stdev: case dataPointAggregationType.entropy: foreach (PropertyInfo pi in piList) { datatCollections[aggType, pi] = new List <double>(); //.Add(item.imbGetPropertySafe<Double>(pi)); } break; } // outputData.Add(aggType, 0); } int count = 0; // <------------ first pass foreach (T item in source) { if (output.firstItem == null) { output.firstItem = item; } output.lastItem = item; foreach (dataPointAggregationType aggType in aggList) { foreach (PropertyInfo pi in piList) { double vl = outputData[aggType, pi]; switch (aggType) { case dataPointAggregationType.sum: vl = vl + item.imbGetPropertySafe <double>(pi); break; case dataPointAggregationType.min: vl = Math.Min(item.imbGetPropertySafe <double>(pi), vl); break; case dataPointAggregationType.max: vl = Math.Max(item.imbGetPropertySafe <double>(pi), vl); break; case dataPointAggregationType.var: case dataPointAggregationType.stdev: case dataPointAggregationType.entropy: datatCollections[aggType, pi].Add(item.imbGetPropertySafe <double>(pi)); break; } outputData[aggType, pi] = vl; } } count++; } foreach (dataPointAggregationType aggType in aggList) { foreach (PropertyInfo pi in piList) { switch (aggType) { case dataPointAggregationType.count: outputData[aggType, pi] = count; break; case dataPointAggregationType.avg: outputData[aggType, pi] = outputData[dataPointAggregationType.sum, pi] / (double)count; break; case dataPointAggregationType.range: outputData[aggType, pi] = outputData[dataPointAggregationType.max, pi] - outputData[dataPointAggregationType.min, pi]; break; case dataPointAggregationType.firstEntry: outputData[aggType, pi] = output.firstItem.imbGetPropertySafe <double>(pi); break; case dataPointAggregationType.lastEntry: outputData[aggType, pi] = output.lastItem.imbGetPropertySafe <double>(pi); break; case dataPointAggregationType.var: outputData[aggType, pi] = datatCollections[aggType, pi].GetVariance(); break; case dataPointAggregationType.stdev: outputData[aggType, pi] = datatCollections[aggType, pi].GetStdDeviation(); break; case dataPointAggregationType.entropy: outputData[aggType, pi] = datatCollections[aggType, pi].GetEntropy(); break; } } } foreach (dataPointAggregationType aggType in aggList) { foreach (PropertyInfo pi in piList) { output[aggType].imbSetPropertyConvertSafe(pi, outputData[aggType, pi]); } if (stringKeepLastEntry) { foreach (PropertyInfo pi in nominalList) { output[aggType].imbSetPropertyConvertSafe(pi, output.lastItem.imbGetPropertySafe(pi)); } } } output.Count = count; return(output); }
public void MakeReports(experimentExecutionContext context, folderNode folder) { meanClassifierReport = new DocumentSetCaseCollectionReport(extractor.name); aceDictionary2D <IWebPostClassifier, kFoldValidationCase, DocumentSetCaseCollectionReport> tempStructure = new aceDictionary2D <IWebPostClassifier, kFoldValidationCase, DocumentSetCaseCollectionReport>(); DSCCReports firstCase = null; List <IWebPostClassifier> classifiers = new List <IWebPostClassifier>(); foreach (var kFoldCasePair in this) { if (firstCase == null) { firstCase = kFoldCasePair.Value; } foreach (var pair in kFoldCasePair.Value.avgReports) { tempStructure[pair.Key, kFoldCasePair.Key] = pair.Value; if (!classifiers.Contains(pair.Key)) { classifiers.Add(pair.Key); } } } // DataSet dataSet = new DataSet(context.setup.name); // <---------- CREATING AVERAGE TABLE ----------------------------------------------------- var tpAvgMacro = new DataTableTypeExtended <DocumentSetCaseCollectionReport>(context.setup.name + " summary", "Cross k-fold averages measures, fold-level measures are computed by macro-average method"); var tpAvgMicro = new DataTableTypeExtended <DocumentSetCaseCollectionReport>(context.setup.name + " summary", "Cross k-fold averages measures, fold-level measures are computed by micro-average method"); List <DocumentSetCaseCollectionReport> macroaverages = new List <DocumentSetCaseCollectionReport>(); DataTableTypeExtended <DocumentSetCaseCollectionReport> EMperKFolds = new DataTableTypeExtended <DocumentSetCaseCollectionReport>(extractor.name + "_allReports"); foreach (IWebPostClassifier classifier in classifiers) { // < ---- report on each classifier context.logger.log("-- producing report about [" + classifier.name + "]"); //objectTable<DocumentSetCaseCollectionReport> tp = new objectTable<DocumentSetCaseCollectionReport>(nameof(DocumentSetCaseCollectionReport.Name), classifier + "_sum"); DocumentSetCaseCollectionReport avg = new DocumentSetCaseCollectionReport(classifier.name + " macro-averaging, k-fold avg. "); DocumentSetCaseCollectionReport rep_eval = new DocumentSetCaseCollectionReport(classifier.name + " micro-averaging, k-fold avg."); rep_eval.Classifier = classifier.name; classificationEvalMetricSet metrics = new classificationEvalMetricSet(); classificationEval eval = new classificationEval(); //eval = metrics[classifier.name]; Int32 c = 0; foreach (KeyValuePair <kFoldValidationCase, DSCCReports> kFoldCasePair in this) { DocumentSetCaseCollectionReport rep = kFoldCasePair.Value.avgReports[classifier]; kFoldValidationCase vCase = kFoldCasePair.Key; classificationEvalMetricSet met = rep.GetSetMetrics(); if (met != null) { foreach (IDocumentSetClass cl in context.classes.GetClasses()) { eval = eval + met[cl.name]; } } rep.Name = classifier.name + "_" + vCase.name; avg.AddValues(rep); EMperKFolds.AddRow(rep); c++; } rep_eval.AddValues(metrics, classificationMetricComputation.microAveraging); avg.Classifier = classifier.name; avg.DivideValues(c); // <<< detecting the best performed classifier in all evaluation folds if (avg.F1measure > highestF1Value) { highestF1Value = avg.F1measure; topClassifierReport = avg; } meanClassifierReport.AddValues(avg); // ----------------- EMperKFolds.AddRow(avg); tpAvgMacro.AddRow(avg); macroaverages.Add(avg); if (DOMAKE_MICROaverage) { tpAvgMicro.AddRow(rep_eval); } // tp.Add(rep_eval); if (context.tools.operation.DoMakeReportForEachClassifier) { DataTable cTable = EMperKFolds; cTable.SetTitle($"{classifier.name} report"); cTable.SetDescription("Summary " + context.setup.validationSetup.k + "-fold validation report for [" + classifier.name + "]"); cTable.SetAdditionalInfoEntry("FV Extractor", extractor.name); cTable.SetAdditionalInfoEntry("Classifier", classifier.name); cTable.SetAdditionalInfoEntry("Class name", classifier.GetType().Name); cTable.SetAdditionalInfoEntry("Correct", rep_eval.Correct); cTable.SetAdditionalInfoEntry("Wrong", rep_eval.Wrong); //cTable.SetAdditionalInfoEntry("Precision", rep_eval.Precision); //cTable.SetAdditionalInfoEntry("Recall", rep_eval.Recall); //cTable.SetAdditionalInfoEntry("F1", rep_eval.F1measure); cTable.SetAdditionalInfoEntry("True Positives", metrics[classifier.name].truePositives); cTable.SetAdditionalInfoEntry("False Negatives", metrics[classifier.name].falseNegatives); cTable.SetAdditionalInfoEntry("False Positives", metrics[classifier.name].falsePositives); cTable.AddExtra("Classifier: " + classifier.name + " [" + classifier.GetType().Name + "]"); var info = classifier.DescribeSelf(); info.ForEach(x => cTable.AddExtra(x)); cTable.AddExtra("-----------------------------------------------------------------------"); cTable.AddExtra("Precision, Recall and F1 measures expressed in this table are computed by macroaveraging shema"); // output.CopyRowsFrom(cTable); cTable.GetReportAndSave(folder, appManager.AppInfo, extractor.name + "_classifier_" + classifier.name); // dataSet.AddTable(cTable); } } rangeFinderForDataTable rangerMacro = new rangeFinderForDataTable(tpAvgMacro, "Name"); meanClassifierReport.DivideValues(classifiers.Count); if (macroaverages.Count > 0) { Double maxF1 = macroaverages.Max(x => x.F1measure); Double minF1 = macroaverages.Min(x => x.F1measure); List <String> minCaseNames = macroaverages.Where(x => x.F1measure == minF1).Select(x => x.Name).ToList(); List <String> maxCaseNames = macroaverages.Where(x => x.F1measure == maxF1).Select(x => x.Name).ToList(); var style = EMperKFolds.GetRowMetaSet().SetStyleForRowsWithValue <String>(DataRowInReportTypeEnum.dataHighlightA, nameof(DocumentSetCaseCollectionReport.Name), maxCaseNames); EMperKFolds.GetRowMetaSet().AddUnit(style); // style = tpAvgMacro.GetRowMetaSet().SetStyleForRowsWithValue<String>(DataRowInReportTypeEnum.dataHighlightC, nameof(DocumentSetCaseCollectionReport.Name), minCaseNames); tpAvgMacro.SetAdditionalInfoEntry("FV Extractor", extractor.name); if (DOMAKE_MICROaverage) { tpAvgMicro.SetAdditionalInfoEntry("FV Extractor", extractor.name); } List <String> averageNames = macroaverages.Select(x => x.Name).ToList(); var avg_style = EMperKFolds.GetRowMetaSet().SetStyleForRowsWithValue <String>(DataRowInReportTypeEnum.dataHighlightC, nameof(DocumentSetCaseCollectionReport.Name), averageNames); foreach (var x in averageNames) { avg_style.AddMatch(x); } } // ::: ------------------------------------------------------------------------------------------------- ::: --------------------------------------------------------------------- ::: // tpAvgMacro.SetTitle($"{extractor.name} - macroaverage report"); if (DOMAKE_MICROaverage) { tpAvgMicro.SetTitle($"{extractor.name} - microaverage report"); } tpAvgMacro.AddExtra("Complete report on " + context.setup.validationSetup.k + "-fold validation FVE [" + extractor.name + "]"); tpAvgMacro.AddExtra("Fold-level P, R and F1 measures are computed by macroaveraging method, values here are cross k-fold means."); if (DOMAKE_MICROaverage) { tpAvgMicro.AddExtra("Complete " + context.setup.validationSetup.k + "-fold validation report for FVE [" + extractor.name + "]"); } if (DOMAKE_MICROaverage) { tpAvgMicro.AddExtra("Fold-level P, R and F1 measures are computed by microaveraging method, values here are cross k-fold means."); } context.AddExperimentInfo(tpAvgMacro); if (DOMAKE_MICROaverage) { context.AddExperimentInfo(tpAvgMicro); } tpAvgMacro.AddExtra(extractor.description); if (extractor is semanticFVExtractor) { semanticFVExtractor semExtractor = (semanticFVExtractor)extractor; semExtractor.termTableConstructor.DescribeSelf().ForEach(x => tpAvgMacro.AddExtra(x)); semExtractor.CloudConstructor.DescribeSelf().ForEach(x => tpAvgMacro.AddExtra(x)); semExtractor.termTableConstructor.DescribeSelf().ForEach(x => tpAvgMicro.AddExtra(x)); semExtractor.CloudConstructor.DescribeSelf().ForEach(x => tpAvgMicro.AddExtra(x)); } context.logger.log("-- producing summary reports on [" + extractor.name + "]"); rangerMacro.AddRangeRows("Macroaverage ", tpAvgMacro, true, imbSCI.Core.math.aggregation.dataPointAggregationType.min | imbSCI.Core.math.aggregation.dataPointAggregationType.max | imbSCI.Core.math.aggregation.dataPointAggregationType.avg | imbSCI.Core.math.aggregation.dataPointAggregationType.stdev); tpAvgMacro.GetReportAndSave(folder, appManager.AppInfo, extractor.name + "_macroaverage_report", true, true); EMperKFolds.AddExtra("The table shows average measures for each fold --- rows marked with colored background show averages for all folds, per classifier."); EMperKFolds.GetReportAndSave(folder, appManager.AppInfo, extractor.name + "_allFolds", true, true); if (DOMAKE_MICROaverage) { tpAvgMicro.GetReportAndSave(folder, appManager.AppInfo, extractor.name + "_microaverage_report", true, true); } //dataSet.GetReportVersion().serializeDataSet(extractor.name + "_classifiers_MultiSheetSummary", folder, imbSCI.Data.enums.reporting.dataTableExportEnum.excel, appManager.AppInfo); }
/// <summary> /// Adds row with information taken from the <see cref="DataTable.ExtendedProperties" /> --- at the current position in the table /// </summary> /// <param name="dt">The dt.</param> /// <param name="extra">The extra.</param> /// <param name="widthLimit">The width limit.</param> /// <returns></returns> public static DataRow AddExtraRow(this DataTable dt, Enum extra, int widthLimit = 25) { Dictionary <DataColumn, List <string> > extraLines = new Dictionary <DataColumn, List <string> >(); aceDictionary2D <int, DataColumn, string> exLines = new aceDictionary2D <int, DataColumn, string>(); DataRow dr = dt.NewRow(); DataTableForStatistics dt_stat = null; if (dt is DataTableForStatistics) { dt_stat = dt as DataTableForStatistics; } foreach (DataColumn col in dt.Columns) { if (col.ExtendedProperties.ContainsKey(extra)) { object vl = col.ExtendedProperties[extra]; if (vl is string) { string vlstr = vl as string; List <string> lines = vlstr.wrapLineBySpace(widthLimit); if (lines.Count > 1) { dr[col] = lines[0]; for (int i = 1; i < lines.Count; i++) { exLines[i, col] = lines[i]; } } else { dr[col] = vlstr; } } else { dr[col] = vl; } } else { dr[col] = null; } } dt.Rows.Add(dr); // if (dt_stat != null) dt_stat.extraRows.Add(dr); if (exLines.Count > 0) { DataRow dre = dt.NewRow(); foreach (var ex in exLines) { foreach (DataColumn col in dt.Columns) { if (ex.Value.ContainsKey(col)) { dre[col] = ex.Value[col]; } else { dre[col] = null; } } } dt.Rows.Add(dre); // if (dt_stat != null) dt_stat.extraRows.Add(dre); } return(dr); }