/// <summary> /// Queries table for specified terms and return aggregated score. The score source is specified by <see cref="termTableColumns.tf_idf"/> (only numeric columns are supported). /// </summary> /// <param name="queryTerms">Terms to test against the table, terms found are used in calculation.</param> /// <param name="scoreToUse">What numeric property of matched term to use for aggregation.</param> /// <param name="aggregation">The aggregation type</param> /// <returns>Any score information from the query terms is ignored.</returns> public static double GetScoreForMatch(this IWeightTable table, IEnumerable <string> queryTerms, termTableColumns scoreToUse = termTableColumns.tf_idf, dataPointAggregationType aggregation = dataPointAggregationType.sum) { List <IWeightTableTerm> output = new List <IWeightTableTerm>(); output = table.GetMatches(queryTerms); return(output.GetScoreAggregate(table, scoreToUse, aggregation)); }
/// <summary> /// Gets aggregated version of the objects /// </summary> /// <typeparam name="T"></typeparam> /// <param name="source">The source.</param> /// <param name="type">The type.</param> /// <returns></returns> public static collectionAggregationResult <T> GetAggregates <T>(this IEnumerable <T> source, dataPointAggregationType type = dataPointAggregationType.avg, bool stringKeepLastEntry = true) where T : class, new() { //if (type == dataPointAggregationType.none) //{ //} var aggList = type.getEnumListFromFlags <dataPointAggregationType>(); collectionAggregationResult <T> output = new collectionAggregationResult <T>(); output.aspect = dataPointAggregationAspect.subSetOfRows; aceDictionary2D <dataPointAggregationType, PropertyInfo, double> outputData = new aceDictionary2D <dataPointAggregationType, PropertyInfo, double>(); aceDictionary2D <dataPointAggregationType, PropertyInfo, List <double> > datatCollections = new aceDictionary2D <dataPointAggregationType, PropertyInfo, List <double> >(); Type t = typeof(T); List <PropertyInfo> nominalList = new List <PropertyInfo>(); List <PropertyInfo> piList = new List <PropertyInfo>(); Dictionary <PropertyInfo, settingsPropertyEntry> sPEDict = new Dictionary <PropertyInfo, settingsPropertyEntry>(); foreach (PropertyInfo pi in t.GetProperties(BindingFlags.Public | BindingFlags.Instance | BindingFlags.SetProperty | BindingFlags.GetProperty)) { settingsPropertyEntry sPE = new settingsPropertyEntry(pi); bool ok = true; if (!pi.CanWrite) { ok = false; } if (ok && pi.PropertyType == typeof(string)) { nominalList.Add(pi); ok = false; } else if (ok && pi.PropertyType == typeof(Enum)) { ok = false; } if (ok && sPE.aggregation[dataPointAggregationAspect.subSetOfRows].HasFlag(dataPointAggregationType.hidden)) { ok = false; } if (ok && sPE.attributes.ContainsKey(imbAttributeName.reporting_hide)) { ok = false; } if (ok) { sPEDict.Add(pi, sPE); piList.Add(pi); } } if (aggList.Contains(dataPointAggregationType.avg)) { aggList.AddUnique(dataPointAggregationType.sum); } if (aggList.Contains(dataPointAggregationType.range)) { aggList.AddUnique(dataPointAggregationType.min); aggList.AddUnique(dataPointAggregationType.max); } foreach (dataPointAggregationType aggType in aggList) { output.Add(aggType, new T()); switch (aggType) { case dataPointAggregationType.var: case dataPointAggregationType.stdev: case dataPointAggregationType.entropy: foreach (PropertyInfo pi in piList) { datatCollections[aggType, pi] = new List <double>(); //.Add(item.imbGetPropertySafe<Double>(pi)); } break; } // outputData.Add(aggType, 0); } int count = 0; // <------------ first pass foreach (T item in source) { if (output.firstItem == null) { output.firstItem = item; } output.lastItem = item; foreach (dataPointAggregationType aggType in aggList) { foreach (PropertyInfo pi in piList) { double vl = outputData[aggType, pi]; switch (aggType) { case dataPointAggregationType.sum: vl = vl + item.imbGetPropertySafe <double>(pi); break; case dataPointAggregationType.min: vl = Math.Min(item.imbGetPropertySafe <double>(pi), vl); break; case dataPointAggregationType.max: vl = Math.Max(item.imbGetPropertySafe <double>(pi), vl); break; case dataPointAggregationType.var: case dataPointAggregationType.stdev: case dataPointAggregationType.entropy: datatCollections[aggType, pi].Add(item.imbGetPropertySafe <double>(pi)); break; } outputData[aggType, pi] = vl; } } count++; } foreach (dataPointAggregationType aggType in aggList) { foreach (PropertyInfo pi in piList) { switch (aggType) { case dataPointAggregationType.count: outputData[aggType, pi] = count; break; case dataPointAggregationType.avg: outputData[aggType, pi] = outputData[dataPointAggregationType.sum, pi] / (double)count; break; case dataPointAggregationType.range: outputData[aggType, pi] = outputData[dataPointAggregationType.max, pi] - outputData[dataPointAggregationType.min, pi]; break; case dataPointAggregationType.firstEntry: outputData[aggType, pi] = output.firstItem.imbGetPropertySafe <double>(pi); break; case dataPointAggregationType.lastEntry: outputData[aggType, pi] = output.lastItem.imbGetPropertySafe <double>(pi); break; case dataPointAggregationType.var: outputData[aggType, pi] = datatCollections[aggType, pi].GetVariance(); break; case dataPointAggregationType.stdev: outputData[aggType, pi] = datatCollections[aggType, pi].GetStdDeviation(); break; case dataPointAggregationType.entropy: outputData[aggType, pi] = datatCollections[aggType, pi].GetEntropy(); break; } } } foreach (dataPointAggregationType aggType in aggList) { foreach (PropertyInfo pi in piList) { output[aggType].imbSetPropertyConvertSafe(pi, outputData[aggType, pi]); } if (stringKeepLastEntry) { foreach (PropertyInfo pi in nominalList) { output[aggType].imbSetPropertyConvertSafe(pi, output.lastItem.imbGetPropertySafe(pi)); } } } output.Count = count; return(output); }
public static double GetScoreAggregate(this IEnumerable <IWeightTableTerm> terms, IWeightTable table, termTableColumns scoreToUse = termTableColumns.tf_idf, dataPointAggregationType aggregation = dataPointAggregationType.sum) { List <double> output = new List <double>(); foreach (IWeightTableTerm term in terms) { switch (scoreToUse) { case termTableColumns.cw: output.Add(table.GetWeight(term)); break; case termTableColumns.df: output.Add(table.GetBDFreq(term)); break; case termTableColumns.freqAbs: output.Add(table.GetAFreq(term)); break; case termTableColumns.freqNorm: output.Add(table.GetNFreq(term)); break; case termTableColumns.idf: output.Add(table.GetIDF(term)); break; case termTableColumns.ncw: output.Add(table.GetNWeight(term)); break; case termTableColumns.none: break; case termTableColumns.words: case termTableColumns.normalizedSemanticDistance: case termTableColumns.semanticDistance: case termTableColumns.termLemma: case termTableColumns.termName: throw new NotImplementedException(); break; case termTableColumns.tf_idf: output.Add(table.GetTF_IDF(term)); break; } } switch (aggregation) { case dataPointAggregationType.avg: return(output.Average()); break; case dataPointAggregationType.count: return(output.Count()); break; case dataPointAggregationType.max: return(output.Max()); break; case dataPointAggregationType.min: return(output.Min()); break; case dataPointAggregationType.range: return(output.Max() - output.Min()); break; case dataPointAggregationType.sum: return(output.Sum()); break; default: throw new dataException("Operation not supported [" + aggregation.toString() + "]", null, table, "Aggregation operation not supported"); return(0); break; } return(0); }
public DataRowMetaDefinition(DataRowInReportTypeEnum __rowType, dataPointAggregationType __aggregation) { rowType = __rowType; aggregation = __aggregation; }
/// <summary> /// Adds the range rows into table. /// </summary> /// <param name="namePrefix">Row name prefix.</param> /// <param name="targetTable">The target table.</param> /// <param name="placeDataRowMarks">if set to <c>true</c> it will set styling conditioners to this table</param> /// <param name="rowsToAdd">The rows.</param> public void AddRangeRows(String namePrefix, DataTable targetTable, Boolean placeDataRowMarks, dataPointAggregationType rowsToAdd = dataPointAggregationType.sum | dataPointAggregationType.avg | dataPointAggregationType.count | dataPointAggregationType.min | dataPointAggregationType.max | dataPointAggregationType.range) { Int32 i = 0; foreach (DataRow dr in targetTable.Rows) { if (i < rowsToSkipFromLearning) { } else { foreach (var pair in finders) { Object vl = dr[pair.Value.id]; if (vl is Int32) { pair.Value.Learn(Convert.ToDouble((Int32)vl)); } else if (vl is Double) { pair.Value.Learn((Double)vl); } else if (vl is Boolean) { pair.Value.Learn(Convert.ToDouble((Boolean)vl)); } } } i++; } List <String> rownamesHMax = new List <string>(); List <String> rownamesHMin = new List <string>(); List <String> rownamesH3 = new List <string>(); foreach (dataPointAggregationType dt in rowsToAdd.getEnumListFromFlags()) { DataRow dr = targetTable.NewRow(); String name = namePrefix + " " + dt.ToString(); if (keyColumn != null) { dr[keyColumn.ColumnName] = name; } if (columnsToSignIn.Any()) { foreach (String cn in columnsToSignIn) { dr[cn] = name; } } if (placeDataRowMarks && keyColumn != null) { foreach (DataRow dd in targetTable.Rows) { foreach (var pair in finders) { rangeFinderWithData rf = pair.Value; //Double vl = Convert.ToDouble(dd[rf.id].imbConvertValueSafeTyped<Double>()); Double vl = dd[rf.id].imbConvertValueSafeTyped <Double>(); switch (dt) { case dataPointAggregationType.max: if (vl == rf.Maximum) { rownamesHMax.Add(dd[keyColumnName].toStringSafe()); } break; case dataPointAggregationType.min: if (vl == rf.Minimum) { rownamesHMin.Add(dd[keyColumnName].toStringSafe()); } break; } } } } foreach (var pair in finders) { rangeFinderWithData rf = pair.Value; try { switch (dt) { case dataPointAggregationType.avg: dr[rf.id] = rf.Average; rownamesH3.Add(name); targetTable.SetAdditionalInfoEntry("Prefix: " + dt.ToString(), "Arithmentic mean"); break; case dataPointAggregationType.count: dr[rf.id] = rf.Count; targetTable.SetAdditionalInfoEntry("Prefix: " + dt.ToString(), "Number of rows"); break; case dataPointAggregationType.max: rownamesHMax.Add(name); if (rf.Maximum > Double.MinValue) { dr[rf.id] = rf.Maximum; } targetTable.SetAdditionalInfoEntry("Prefix: " + dt.ToString(), "Highest value"); break; case dataPointAggregationType.min: rownamesHMin.Add(name); if (rf.Minimum < Double.MaxValue) { dr[rf.id] = rf.Minimum; } targetTable.SetAdditionalInfoEntry("Prefix: " + dt.ToString(), "Smallest value"); break; case dataPointAggregationType.range: dr[rf.id] = rf.Range; targetTable.SetAdditionalInfoEntry("Prefix: " + dt.ToString(), "Range of values"); break; case dataPointAggregationType.sum: dr[rf.id] = rf.Sum; rownamesH3.Add(name); targetTable.SetAdditionalInfoEntry("Prefix: " + dt.ToString(), "Sum"); break; case dataPointAggregationType.entropy: dr[rf.id] = rf.doubleEntries.GetEntropy(1E-06, true); targetTable.SetAdditionalInfoEntry("Prefix: " + dt.ToString(), "Normalized Entropy"); rownamesH3.Add(name); break; case dataPointAggregationType.stdev: dr[rf.id] = rf.doubleEntries.GetStdDeviation(); rownamesH3.Add(name); targetTable.SetAdditionalInfoEntry("Prefix: " + dt.ToString(), "Standard Deviation"); break; case dataPointAggregationType.var: targetTable.SetAdditionalInfoEntry(dt.ToString(), "Variance"); dr[rf.id] = rf.doubleEntries.GetVariance(); rownamesH3.Add(name); targetTable.SetAdditionalInfoEntry("Prefix: " + dt.ToString(), "Variance"); break; } } catch (Exception ex) { targetTable.AddExtra("rangeFinder[" + rf.id + "] failed on [" + dt.ToString() + "] with exception: " + ex.Message); targetTable.AddExtra("::: " + ex.StackTrace); } //if (!targetTable.GetAdditionalInfo().ContainsKey(dt) } targetTable.Rows.Add(dr); } targetTable.GetRowMetaSet().SetStyleForRowsWithValue <String>(DataRowInReportTypeEnum.dataHighlightA, keyColumnName, rownamesHMin); targetTable.GetRowMetaSet().SetStyleForRowsWithValue <String>(DataRowInReportTypeEnum.dataHighlightB, keyColumnName, rownamesHMax); targetTable.GetRowMetaSet().SetStyleForRowsWithValue <String>(DataRowInReportTypeEnum.dataHighlightC, keyColumnName, rownamesH3); }
/// <summary>Performs post-processing of data collected by the workload plugin</summary> /// <remarks><para>Loads all saved DataTables, groups rows in averages for each measure group and creates summary table with all experiments</para></remarks> /// <param name="searchPattern">pattern used to select input files</param> /// <param name="groupColumn">column name used for row grouping</param> /// <param name="overviewColumns">columns to include in overview table</param> /// <seealso cref="aceOperationSetExecutorBase"/> public void aceOperation_runWorkloadData( [Description("pattern used to select input files")] string searchPattern = "results*.xml", [Description("column name used for row grouping")] string groupColumn = "measureGroup") // [Description("columns to include in overview table")] String overviewColumns = "DataLoad,CrawlerIterations,ContentPages,dlcMaximum") { aceOperation_selectFiles(searchPattern, "index\\workload", true); folder = folder["index\\workload"]; List <DataTable> tables = new List <DataTable>(); dataPointAggregationType aggType = dataPointAggregationType.avg; int ci = 1; int c = selectedFiles.Count(); output.log("[" + c + "] DataTable in the cue."); List <DataTable> allTables = new List <DataTable>(); DataSet dSet = new DataSet(); aceDictionarySet <string, DataTable> byCrawler = new aceDictionarySet <string, DataTable>(); aceDictionarySet <string, DataTableForStatistics> byCrawlerRT = new aceDictionarySet <string, DataTableForStatistics>(); DataTableForStatistics rt = null; foreach (FileInfo fi in selectedFiles) { try { objectTable <reportPlugIn_workloadEntry> workloadEntry = new objectTable <reportPlugIn_workloadEntry>(fi.FullName, true, "EntryID", ""); objectTable <reportPlugIn_workloadEntry> workloadGrouped = new objectTable <reportPlugIn_workloadEntry>("EntryID", "aggregated"); aceDictionarySet <int, reportPlugIn_workloadEntry> workloadGroups = workloadEntry.GetGroups <int>(groupColumn, "terminationWarning = 0"); collectionAggregationResultSet <reportPlugIn_workloadEntry> aggregateSet = new collectionAggregationResultSet <reportPlugIn_workloadEntry>(); foreach (var set in workloadGroups) { collectionAggregationResult <reportPlugIn_workloadEntry> aggregates = null; aggregates = set.Value.GetAggregates(aggType); var aggregate = aggregates[aggType]; aggregate.measureGroup = set.Key; aggregate.EntryID = set.Key.ToString("D5") + "_" + aggType.ToString(); workloadGrouped.AddOrUpdate(aggregate); aggregateSet.Add(aggregate.EntryID + "_" + fi.Name, aggregates); } string filename = (fi.Name + "_" + groupColumn + "_" + aggType.ToString()).getFilename(); string n = reportPlugIn_workload_state.ExtractEntryID(aggregateSet.lastItem.EntryID) + dSet.Tables.Count.ToString("D2"); DataTable dt = workloadGrouped.GetDataTable(dSet, n); dt.SetDescription("Collection of [" + aggregateSet.recordType.Name + "] records grouped by [" + groupColumn + "]"); dt.SetAggregationAspect(dataPointAggregationAspect.subSetOfRows); dt.SetAggregationOriginCount(aggregateSet.Count); dt.SetAdditionalInfoEntry("Aggregation Type:", aggType); dt.SetAdditionalInfoEntry("Data source file:", fi.Name); dt.SetAdditionalInfoEntries("Last", aggregateSet.lastItem, typeof(string)); dt.SetTitle(n); byCrawler.Add(aggregateSet.firstItem.Crawler, dt); // dt.TableName = n; // dSet.AddTable(dt); rt = dt.GetReportAndSave(folder, imbWEMManager.authorNotation, n.getFilename(), true); byCrawlerRT.Add(aggregateSet.firstItem.Crawler, rt); response.AppendLine("[" + ci + " / " + c + "] DataTable [" + fi.Name + "] had [" + workloadGroups.Keys.Count() + "] groups. Result saved as: " + filename); ci++; } catch (Exception ex) { output.log("[" + ci + " / " + c + "] DataTable [" + fi.FullName + "] failed."); output.log(ex.Message); } } output.log("[" + c + "] DataTable processed."); dSet.serializeDataSet("workload_all", folder, dataTableExportEnum.excel, imbWEMManager.authorNotation); foreach (string key in byCrawler.Keys) { string filename = key.getFilename(); DataSet sd = new DataSet(key); foreach (DataTable dti in byCrawler[key]) { sd.AddTable(dti.Copy()); } sd.AddTable(byCrawlerRT[key].First().RenderLegend()); sd.serializeDataSet(filename, folder, dataTableExportEnum.excel, imbWEMManager.authorNotation); } }
public DataColumnInReportDefinition Add(DataColumnInReportTypeEnum columnType, DataColumn column, dataPointAggregationType aggregation, string unit = "") { DataColumnInReportDefinition output = new DataColumnInReportDefinition(); output.columnType = columnType; output.aggregation = aggregation; output.columnSourceName = column.ColumnName; output.columnPriority = column.GetPriority(); output.format = column.GetFormat(); Type valueType = typeof(string); string name = ""; string description = ""; string letter = ""; switch (aggregation) { default: case dataPointAggregationType.max: case dataPointAggregationType.min: case dataPointAggregationType.sum: case dataPointAggregationType.firstEntry: case dataPointAggregationType.lastEntry: case dataPointAggregationType.range: valueType = column.DataType; break; case dataPointAggregationType.avg: case dataPointAggregationType.stdev: case dataPointAggregationType.var: case dataPointAggregationType.entropy: valueType = typeof(double); if (output.format.isNullOrEmpty()) { output.format = "F5"; } break; case dataPointAggregationType.count: valueType = typeof(int); break; } letter = column.GetLetter(); if (columnType == DataColumnInReportTypeEnum.dataSummed) { if (!letter.isNullOrEmpty()) { letter = aggregation.ToString() + "(" + letter + ")"; } output.columnLetter = letter; output.columnDescription = "(" + aggregation.ToString() + ") of " + column.ColumnName + ". " + column.GetDesc(); } output.columnName = column.ColumnName + " (" + aggregation.ToString() + ")"; output.columnSourceName = column.ColumnName; output.importance = column.GetImportance(); output.columnUnit = column.GetUnit(); output.columnValueType = valueType; output.columnDefault = valueType.GetDefaultValue(); output.columnGroup = column.GetGroup(); output.spe = column.GetSPE(); Add(column.ColumnName, output); return(output); }