示例#1
0
        /// <summary>
        /// Gets the sum data set.
        /// </summary>
        /// <param name="datasets">The datasets.</param>
        /// <param name="dataSetName">Name of the data set.</param>
        /// <returns></returns>
        public static DataSet GetSumDataSet(this IEnumerable <DataSet> datasets, string dataSetName = "dataset")
        {
            DataSet dss = null;

            aceDictionarySet <string, DataTable> tableCumul = new aceDictionarySet <string, DataTable>();

            List <DataTable> fradt = new List <DataTable>();

            foreach (DataSet ds in datasets)
            {
                if (dss == null)
                {
                    dss = ds;
                }

                foreach (DataTable tb in ds.Tables)
                {
                    tableCumul.Add(tb.TableName, tb);
                }
            }

            DataSet fin = new DataSet(dataSetName);

            foreach (DataTable tb in dss.Tables)
            {
                fin.AddTable(GetAggregatedTable(tableCumul[tb.TableName], "SumOf_" + tb.TableName, dataPointAggregationAspect.overlapMultiTable)); //GetSumTable(dataSetName + "_" + tb.TableName + "_sum_" + dss.Tables.Count));
            }

            return(fin);
        }
示例#2
0
        /// <summary>
        /// Build EPT data model
        /// </summary>
        /// <param name="dataRange">Used range of input data</param>
        /// <param name="parameter">Parameter</param>
        /// <param name="delimiter">Delimiter used to split  parameter</param>
        /// <param name="parameterColumn">Zero-indexed integer</param>
        /// <param name="valueColumn">Zero-indexed integer</param>
        /// <returns></returns>
        public void Build(object[,] dataRange, string parameter, char delimiter,
                          int parameterColumn, int valueColumn, int prefix = 0)
        {
            // Shaon
            Database = new DataSet();
            int indexP = parameterColumn - 1;
            int indexV = valueColumn - 1;

            string[] fields = parameter.Split(delimiter).Skip(1).ToArray(); // skip name section
            if (prefix == 1)
            {
                indexP++;
            }
            for (int i = 0 + prefix; i < dataRange.GetLength(0) + prefix; i++)
            {
                string[] param = dataRange[i, indexP]?.ToString().Split(delimiter);
                if (param?.Length >= 3)
                {
                    List <object> values = new List <object>(param.Length);
                    values.AddRange(param.Skip(1)); // skip name section

                    if (prefix == 1)
                    {
                        values.Add(dataRange[i, indexV + 1]);
                    }
                    else
                    {
                        values.Add(dataRange[i, indexV]);
                    }
                    Database.AddTable(param[0], fields, values.ToArray());
                }
            }
        }
示例#3
0
        /// <summary>
        /// Gets the copy of DataTable
        /// </summary>
        /// <returns></returns>
        public DataTable GetDataTable(DataSet dataset = null, string tableName = "")
        {
            DataTable output = table.GetClonedShema <DataTable>();

            output.SetClassName(type.Name);
            output.SetClassType(type);

            if (tableName.isNullOrEmpty())
            {
                tableName = name;
            }

            output.SetTitle(tableName);
            output.SetDescription(description);

            output.SetCategoryPriority(settings.CategoryByPriority);
            if (Enumerable.Count <string>(settings.CategoryByPriority) > 0)
            {
            }
            output.SetAdditionalInfo(table.GetAdditionalInfo());

            output.SetAggregationAspect(dataPointAggregationAspect.none);

            output.CopyRowsFrom(table);

            if (dataset != null)
            {
                dataset.AddTable(table);
            }
            return(output);
        }
示例#4
0
        /// <summary>
        /// Gets the data table clean.
        /// </summary>
        /// <param name="tableName">Name of the table.</param>
        /// <param name="ds">The ds.</param>
        /// <param name="onlyTermAndFreq">if set to <c>true</c> [only term and freq].</param>
        /// <returns></returns>
        public DataTable GetDataTableClean(string tableName = "", DataSet ds = null, bool onlyTermAndFreq = false)
        {
            if (tableName.isNullOrEmpty())
            {
                tableName = name;
            }

            DataTable output = new DataTable();

            output.SetTitle(tableName);

            output.Add(termTableColumns.termName, "Nominal form of the term", "Tn", typeof(string), dataPointImportance.normal);
            output.Add(termTableColumns.termInflections, "Inflected words or otherwise related terms in the same semantic cloud, as CSV", "Ti", typeof(string), dataPointImportance.normal);
            output.Add(termTableColumns.freqAbs, "Absolute frequency - number of occurences", "T_af", typeof(int), dataPointImportance.normal, "Abs. freq.");

            if (!onlyTermAndFreq)
            {
                output.Add(termTableColumns.freqNorm, "Normalized frequency - abs. frequency divided by the maximum", "T_nf", typeof(double), dataPointImportance.important, "#0.00000");
                output.Add(termTableColumns.df, "Document frequency - number of documents containing the term", "T_df", typeof(int), dataPointImportance.normal);
                output.Add(termTableColumns.idf, "Inverse document frequency - logaritmicly normalized T_df", "T_idf", typeof(double), dataPointImportance.normal, "#0.00000");
                output.Add(termTableColumns.tf_idf, "Term frequency Inverse document frequency - calculated as TF-IDF", "T_tf-idf", typeof(double), dataPointImportance.important, "#0.00000");
                output.Add(termTableColumns.cw, "Cumulative weight of term", "T_cw", typeof(double), dataPointImportance.normal, "#0.00000");
                output.Add(termTableColumns.ncw, "Normalized cumulative weight of term", "T_ncw", typeof(double), dataPointImportance.important, "#0.00000");
            }

            foreach (IWeightTableTerm t in terms.Values)
            {
                DataRow dr = output.NewRow();

                dr[nameof(termTableColumns.termName)] = t.name;
                List <string> _all = t.GetAllForms(false);

                dr[nameof(termTableColumns.termInflections)] = _all.toCsvInLine();
                dr[nameof(termTableColumns.freqAbs)]         = GetAFreq(t.nominalForm);

                if (!onlyTermAndFreq)
                {
                    dr[nameof(termTableColumns.freqNorm)] = GetNFreq(t.nominalForm);
                    dr[nameof(termTableColumns.df)]       = GetBDFreq(t.nominalForm);
                    dr[nameof(termTableColumns.idf)]      = GetIDF(t.nominalForm);
                    dr[nameof(termTableColumns.tf_idf)]   = GetTF_IDF(t.nominalForm);
                    dr[nameof(termTableColumns.cw)]       = GetWeight(t.nominalForm);
                    dr[nameof(termTableColumns.ncw)]      = GetNWeight(t.nominalForm);
                }

                output.Rows.Add(dr);
            }

            if (ds != null)
            {
                ds.AddTable(output);
            }

            return(output);
        }
示例#5
0
        /// <summary>Performs post-processing of data collected by the workload plugin</summary>
        /// <remarks><para>Loads all saved DataTables, groups rows in averages for each measure group and creates summary table with all experiments</para></remarks>
        /// <param name="searchPattern">pattern used to select input files</param>
        /// <param name="groupColumn">column name used for row grouping</param>
        /// <param name="overviewColumns">columns to include in overview table</param>
        /// <seealso cref="aceOperationSetExecutorBase"/>
        public void aceOperation_runWorkloadData(
            [Description("pattern used to select input files")] string searchPattern = "results*.xml",
            [Description("column name used for row grouping")] string groupColumn    = "measureGroup")
        // [Description("columns to include in overview table")] String overviewColumns = "DataLoad,CrawlerIterations,ContentPages,dlcMaximum")
        {
            aceOperation_selectFiles(searchPattern, "index\\workload", true);

            folder = folder["index\\workload"];

            List <DataTable> tables = new List <DataTable>();

            dataPointAggregationType aggType = dataPointAggregationType.avg;

            int ci = 1;
            int c  = selectedFiles.Count();

            output.log("[" + c + "] DataTable in the cue.");


            List <DataTable> allTables = new List <DataTable>();
            DataSet          dSet      = new DataSet();


            aceDictionarySet <string, DataTable> byCrawler = new aceDictionarySet <string, DataTable>();
            aceDictionarySet <string, DataTableForStatistics> byCrawlerRT = new aceDictionarySet <string, DataTableForStatistics>();

            DataTableForStatistics rt = null;

            foreach (FileInfo fi in selectedFiles)
            {
                try
                {
                    objectTable <reportPlugIn_workloadEntry> workloadEntry = new objectTable <reportPlugIn_workloadEntry>(fi.FullName, true, "EntryID", "");

                    objectTable <reportPlugIn_workloadEntry> workloadGrouped = new objectTable <reportPlugIn_workloadEntry>("EntryID", "aggregated");

                    aceDictionarySet <int, reportPlugIn_workloadEntry> workloadGroups = workloadEntry.GetGroups <int>(groupColumn, "terminationWarning = 0");

                    collectionAggregationResultSet <reportPlugIn_workloadEntry> aggregateSet = new collectionAggregationResultSet <reportPlugIn_workloadEntry>();



                    foreach (var set in workloadGroups)
                    {
                        collectionAggregationResult <reportPlugIn_workloadEntry> aggregates = null;
                        aggregates = set.Value.GetAggregates(aggType);

                        var aggregate = aggregates[aggType];
                        aggregate.measureGroup = set.Key;
                        aggregate.EntryID      = set.Key.ToString("D5") + "_" + aggType.ToString();
                        workloadGrouped.AddOrUpdate(aggregate);
                        aggregateSet.Add(aggregate.EntryID + "_" + fi.Name, aggregates);
                    }

                    string filename = (fi.Name + "_" + groupColumn + "_" + aggType.ToString()).getFilename();

                    string n = reportPlugIn_workload_state.ExtractEntryID(aggregateSet.lastItem.EntryID) + dSet.Tables.Count.ToString("D2");

                    DataTable dt = workloadGrouped.GetDataTable(dSet, n);
                    dt.SetDescription("Collection of [" + aggregateSet.recordType.Name + "] records grouped by [" + groupColumn + "]");
                    dt.SetAggregationAspect(dataPointAggregationAspect.subSetOfRows);
                    dt.SetAggregationOriginCount(aggregateSet.Count);
                    dt.SetAdditionalInfoEntry("Aggregation Type:", aggType);
                    dt.SetAdditionalInfoEntry("Data source file:", fi.Name);

                    dt.SetAdditionalInfoEntries("Last", aggregateSet.lastItem, typeof(string));

                    dt.SetTitle(n);

                    byCrawler.Add(aggregateSet.firstItem.Crawler, dt);

                    // dt.TableName = n;
                    //   dSet.AddTable(dt);


                    rt = dt.GetReportAndSave(folder, imbWEMManager.authorNotation, n.getFilename(), true);
                    byCrawlerRT.Add(aggregateSet.firstItem.Crawler, rt);
                    response.AppendLine("[" + ci + " / " + c + "] DataTable [" + fi.Name + "] had [" + workloadGroups.Keys.Count() + "] groups. Result saved as: " + filename);
                    ci++;
                } catch (Exception ex)
                {
                    output.log("[" + ci + " / " + c + "] DataTable [" + fi.FullName + "] failed.");
                    output.log(ex.Message);
                }
            }



            output.log("[" + c + "] DataTable processed.");

            dSet.serializeDataSet("workload_all", folder, dataTableExportEnum.excel, imbWEMManager.authorNotation);

            foreach (string key in byCrawler.Keys)
            {
                string  filename = key.getFilename();
                DataSet sd       = new DataSet(key);
                foreach (DataTable dti in byCrawler[key])
                {
                    sd.AddTable(dti.Copy());
                }

                sd.AddTable(byCrawlerRT[key].First().RenderLegend());
                sd.serializeDataSet(filename, folder, dataTableExportEnum.excel, imbWEMManager.authorNotation);
            }
        }