/// <summary> /// Gets the sum data set. /// </summary> /// <param name="datasets">The datasets.</param> /// <param name="dataSetName">Name of the data set.</param> /// <returns></returns> public static DataSet GetSumDataSet(this IEnumerable <DataSet> datasets, string dataSetName = "dataset") { DataSet dss = null; aceDictionarySet <string, DataTable> tableCumul = new aceDictionarySet <string, DataTable>(); List <DataTable> fradt = new List <DataTable>(); foreach (DataSet ds in datasets) { if (dss == null) { dss = ds; } foreach (DataTable tb in ds.Tables) { tableCumul.Add(tb.TableName, tb); } } DataSet fin = new DataSet(dataSetName); foreach (DataTable tb in dss.Tables) { fin.AddTable(GetAggregatedTable(tableCumul[tb.TableName], "SumOf_" + tb.TableName, dataPointAggregationAspect.overlapMultiTable)); //GetSumTable(dataSetName + "_" + tb.TableName + "_sum_" + dss.Tables.Count)); } return(fin); }
/// <summary> /// Build EPT data model /// </summary> /// <param name="dataRange">Used range of input data</param> /// <param name="parameter">Parameter</param> /// <param name="delimiter">Delimiter used to split parameter</param> /// <param name="parameterColumn">Zero-indexed integer</param> /// <param name="valueColumn">Zero-indexed integer</param> /// <returns></returns> public void Build(object[,] dataRange, string parameter, char delimiter, int parameterColumn, int valueColumn, int prefix = 0) { // Shaon Database = new DataSet(); int indexP = parameterColumn - 1; int indexV = valueColumn - 1; string[] fields = parameter.Split(delimiter).Skip(1).ToArray(); // skip name section if (prefix == 1) { indexP++; } for (int i = 0 + prefix; i < dataRange.GetLength(0) + prefix; i++) { string[] param = dataRange[i, indexP]?.ToString().Split(delimiter); if (param?.Length >= 3) { List <object> values = new List <object>(param.Length); values.AddRange(param.Skip(1)); // skip name section if (prefix == 1) { values.Add(dataRange[i, indexV + 1]); } else { values.Add(dataRange[i, indexV]); } Database.AddTable(param[0], fields, values.ToArray()); } } }
/// <summary> /// Gets the copy of DataTable /// </summary> /// <returns></returns> public DataTable GetDataTable(DataSet dataset = null, string tableName = "") { DataTable output = table.GetClonedShema <DataTable>(); output.SetClassName(type.Name); output.SetClassType(type); if (tableName.isNullOrEmpty()) { tableName = name; } output.SetTitle(tableName); output.SetDescription(description); output.SetCategoryPriority(settings.CategoryByPriority); if (Enumerable.Count <string>(settings.CategoryByPriority) > 0) { } output.SetAdditionalInfo(table.GetAdditionalInfo()); output.SetAggregationAspect(dataPointAggregationAspect.none); output.CopyRowsFrom(table); if (dataset != null) { dataset.AddTable(table); } return(output); }
/// <summary> /// Gets the data table clean. /// </summary> /// <param name="tableName">Name of the table.</param> /// <param name="ds">The ds.</param> /// <param name="onlyTermAndFreq">if set to <c>true</c> [only term and freq].</param> /// <returns></returns> public DataTable GetDataTableClean(string tableName = "", DataSet ds = null, bool onlyTermAndFreq = false) { if (tableName.isNullOrEmpty()) { tableName = name; } DataTable output = new DataTable(); output.SetTitle(tableName); output.Add(termTableColumns.termName, "Nominal form of the term", "Tn", typeof(string), dataPointImportance.normal); output.Add(termTableColumns.termInflections, "Inflected words or otherwise related terms in the same semantic cloud, as CSV", "Ti", typeof(string), dataPointImportance.normal); output.Add(termTableColumns.freqAbs, "Absolute frequency - number of occurences", "T_af", typeof(int), dataPointImportance.normal, "Abs. freq."); if (!onlyTermAndFreq) { output.Add(termTableColumns.freqNorm, "Normalized frequency - abs. frequency divided by the maximum", "T_nf", typeof(double), dataPointImportance.important, "#0.00000"); output.Add(termTableColumns.df, "Document frequency - number of documents containing the term", "T_df", typeof(int), dataPointImportance.normal); output.Add(termTableColumns.idf, "Inverse document frequency - logaritmicly normalized T_df", "T_idf", typeof(double), dataPointImportance.normal, "#0.00000"); output.Add(termTableColumns.tf_idf, "Term frequency Inverse document frequency - calculated as TF-IDF", "T_tf-idf", typeof(double), dataPointImportance.important, "#0.00000"); output.Add(termTableColumns.cw, "Cumulative weight of term", "T_cw", typeof(double), dataPointImportance.normal, "#0.00000"); output.Add(termTableColumns.ncw, "Normalized cumulative weight of term", "T_ncw", typeof(double), dataPointImportance.important, "#0.00000"); } foreach (IWeightTableTerm t in terms.Values) { DataRow dr = output.NewRow(); dr[nameof(termTableColumns.termName)] = t.name; List <string> _all = t.GetAllForms(false); dr[nameof(termTableColumns.termInflections)] = _all.toCsvInLine(); dr[nameof(termTableColumns.freqAbs)] = GetAFreq(t.nominalForm); if (!onlyTermAndFreq) { dr[nameof(termTableColumns.freqNorm)] = GetNFreq(t.nominalForm); dr[nameof(termTableColumns.df)] = GetBDFreq(t.nominalForm); dr[nameof(termTableColumns.idf)] = GetIDF(t.nominalForm); dr[nameof(termTableColumns.tf_idf)] = GetTF_IDF(t.nominalForm); dr[nameof(termTableColumns.cw)] = GetWeight(t.nominalForm); dr[nameof(termTableColumns.ncw)] = GetNWeight(t.nominalForm); } output.Rows.Add(dr); } if (ds != null) { ds.AddTable(output); } return(output); }
/// <summary>Performs post-processing of data collected by the workload plugin</summary> /// <remarks><para>Loads all saved DataTables, groups rows in averages for each measure group and creates summary table with all experiments</para></remarks> /// <param name="searchPattern">pattern used to select input files</param> /// <param name="groupColumn">column name used for row grouping</param> /// <param name="overviewColumns">columns to include in overview table</param> /// <seealso cref="aceOperationSetExecutorBase"/> public void aceOperation_runWorkloadData( [Description("pattern used to select input files")] string searchPattern = "results*.xml", [Description("column name used for row grouping")] string groupColumn = "measureGroup") // [Description("columns to include in overview table")] String overviewColumns = "DataLoad,CrawlerIterations,ContentPages,dlcMaximum") { aceOperation_selectFiles(searchPattern, "index\\workload", true); folder = folder["index\\workload"]; List <DataTable> tables = new List <DataTable>(); dataPointAggregationType aggType = dataPointAggregationType.avg; int ci = 1; int c = selectedFiles.Count(); output.log("[" + c + "] DataTable in the cue."); List <DataTable> allTables = new List <DataTable>(); DataSet dSet = new DataSet(); aceDictionarySet <string, DataTable> byCrawler = new aceDictionarySet <string, DataTable>(); aceDictionarySet <string, DataTableForStatistics> byCrawlerRT = new aceDictionarySet <string, DataTableForStatistics>(); DataTableForStatistics rt = null; foreach (FileInfo fi in selectedFiles) { try { objectTable <reportPlugIn_workloadEntry> workloadEntry = new objectTable <reportPlugIn_workloadEntry>(fi.FullName, true, "EntryID", ""); objectTable <reportPlugIn_workloadEntry> workloadGrouped = new objectTable <reportPlugIn_workloadEntry>("EntryID", "aggregated"); aceDictionarySet <int, reportPlugIn_workloadEntry> workloadGroups = workloadEntry.GetGroups <int>(groupColumn, "terminationWarning = 0"); collectionAggregationResultSet <reportPlugIn_workloadEntry> aggregateSet = new collectionAggregationResultSet <reportPlugIn_workloadEntry>(); foreach (var set in workloadGroups) { collectionAggregationResult <reportPlugIn_workloadEntry> aggregates = null; aggregates = set.Value.GetAggregates(aggType); var aggregate = aggregates[aggType]; aggregate.measureGroup = set.Key; aggregate.EntryID = set.Key.ToString("D5") + "_" + aggType.ToString(); workloadGrouped.AddOrUpdate(aggregate); aggregateSet.Add(aggregate.EntryID + "_" + fi.Name, aggregates); } string filename = (fi.Name + "_" + groupColumn + "_" + aggType.ToString()).getFilename(); string n = reportPlugIn_workload_state.ExtractEntryID(aggregateSet.lastItem.EntryID) + dSet.Tables.Count.ToString("D2"); DataTable dt = workloadGrouped.GetDataTable(dSet, n); dt.SetDescription("Collection of [" + aggregateSet.recordType.Name + "] records grouped by [" + groupColumn + "]"); dt.SetAggregationAspect(dataPointAggregationAspect.subSetOfRows); dt.SetAggregationOriginCount(aggregateSet.Count); dt.SetAdditionalInfoEntry("Aggregation Type:", aggType); dt.SetAdditionalInfoEntry("Data source file:", fi.Name); dt.SetAdditionalInfoEntries("Last", aggregateSet.lastItem, typeof(string)); dt.SetTitle(n); byCrawler.Add(aggregateSet.firstItem.Crawler, dt); // dt.TableName = n; // dSet.AddTable(dt); rt = dt.GetReportAndSave(folder, imbWEMManager.authorNotation, n.getFilename(), true); byCrawlerRT.Add(aggregateSet.firstItem.Crawler, rt); response.AppendLine("[" + ci + " / " + c + "] DataTable [" + fi.Name + "] had [" + workloadGroups.Keys.Count() + "] groups. Result saved as: " + filename); ci++; } catch (Exception ex) { output.log("[" + ci + " / " + c + "] DataTable [" + fi.FullName + "] failed."); output.log(ex.Message); } } output.log("[" + c + "] DataTable processed."); dSet.serializeDataSet("workload_all", folder, dataTableExportEnum.excel, imbWEMManager.authorNotation); foreach (string key in byCrawler.Keys) { string filename = key.getFilename(); DataSet sd = new DataSet(key); foreach (DataTable dti in byCrawler[key]) { sd.AddTable(dti.Copy()); } sd.AddTable(byCrawlerRT[key].First().RenderLegend()); sd.serializeDataSet(filename, folder, dataTableExportEnum.excel, imbWEMManager.authorNotation); } }