Exemple #1
0
        /// <summary>
        /// Queries table for specified terms and return aggregated score. The score source is specified by <see cref="termTableColumns.tf_idf"/> (only numeric columns are supported).
        /// </summary>
        /// <param name="queryTerms">Terms to test against the table, terms found are used in calculation.</param>
        /// <param name="scoreToUse">What numeric property of matched term to use for aggregation.</param>
        /// <param name="aggregation">The aggregation type</param>
        /// <returns>Any score information from the query terms is ignored.</returns>
        public static double GetScoreForMatch(this IWeightTable table, IEnumerable <string> queryTerms, termTableColumns scoreToUse = termTableColumns.tf_idf, dataPointAggregationType aggregation = dataPointAggregationType.sum)
        {
            List <IWeightTableTerm> output = new List <IWeightTableTerm>();

            output = table.GetMatches(queryTerms);
            return(output.GetScoreAggregate(table, scoreToUse, aggregation));
        }
        /// <summary>
        /// Gets aggregated version of the objects
        /// </summary>
        /// <typeparam name="T"></typeparam>
        /// <param name="source">The source.</param>
        /// <param name="type">The type.</param>
        /// <returns></returns>
        public static collectionAggregationResult <T> GetAggregates <T>(this IEnumerable <T> source, dataPointAggregationType type = dataPointAggregationType.avg, bool stringKeepLastEntry = true) where T : class, new()
        {
            //if (type == dataPointAggregationType.none)
            //{
            //}

            var aggList = type.getEnumListFromFlags <dataPointAggregationType>();

            collectionAggregationResult <T> output = new collectionAggregationResult <T>();

            output.aspect = dataPointAggregationAspect.subSetOfRows;

            aceDictionary2D <dataPointAggregationType, PropertyInfo, double> outputData = new aceDictionary2D <dataPointAggregationType, PropertyInfo, double>();

            aceDictionary2D <dataPointAggregationType, PropertyInfo, List <double> > datatCollections = new aceDictionary2D <dataPointAggregationType, PropertyInfo, List <double> >();

            Type t = typeof(T);

            List <PropertyInfo> nominalList = new List <PropertyInfo>();
            List <PropertyInfo> piList      = new List <PropertyInfo>();

            Dictionary <PropertyInfo, settingsPropertyEntry> sPEDict = new Dictionary <PropertyInfo, settingsPropertyEntry>();

            foreach (PropertyInfo pi in t.GetProperties(BindingFlags.Public | BindingFlags.Instance | BindingFlags.SetProperty | BindingFlags.GetProperty))
            {
                settingsPropertyEntry sPE = new settingsPropertyEntry(pi);

                bool ok = true;

                if (!pi.CanWrite)
                {
                    ok = false;
                }

                if (ok && pi.PropertyType == typeof(string))
                {
                    nominalList.Add(pi);
                    ok = false;
                }
                else if (ok && pi.PropertyType == typeof(Enum))
                {
                    ok = false;
                }

                if (ok && sPE.aggregation[dataPointAggregationAspect.subSetOfRows].HasFlag(dataPointAggregationType.hidden))
                {
                    ok = false;
                }
                if (ok && sPE.attributes.ContainsKey(imbAttributeName.reporting_hide))
                {
                    ok = false;
                }

                if (ok)
                {
                    sPEDict.Add(pi, sPE);
                    piList.Add(pi);
                }
            }

            if (aggList.Contains(dataPointAggregationType.avg))
            {
                aggList.AddUnique(dataPointAggregationType.sum);
            }

            if (aggList.Contains(dataPointAggregationType.range))
            {
                aggList.AddUnique(dataPointAggregationType.min);
                aggList.AddUnique(dataPointAggregationType.max);
            }

            foreach (dataPointAggregationType aggType in aggList)
            {
                output.Add(aggType, new T());

                switch (aggType)
                {
                case dataPointAggregationType.var:
                case dataPointAggregationType.stdev:
                case dataPointAggregationType.entropy:
                    foreach (PropertyInfo pi in piList)
                    {
                        datatCollections[aggType, pi] = new List <double>();    //.Add(item.imbGetPropertySafe<Double>(pi));
                    }
                    break;
                }

                // outputData.Add(aggType, 0);
            }

            int count = 0;

            // <------------ first pass
            foreach (T item in source)
            {
                if (output.firstItem == null)
                {
                    output.firstItem = item;
                }
                output.lastItem = item;
                foreach (dataPointAggregationType aggType in aggList)
                {
                    foreach (PropertyInfo pi in piList)
                    {
                        double vl = outputData[aggType, pi];

                        switch (aggType)
                        {
                        case dataPointAggregationType.sum:
                            vl = vl + item.imbGetPropertySafe <double>(pi);
                            break;

                        case dataPointAggregationType.min:
                            vl = Math.Min(item.imbGetPropertySafe <double>(pi), vl);
                            break;

                        case dataPointAggregationType.max:
                            vl = Math.Max(item.imbGetPropertySafe <double>(pi), vl);
                            break;

                        case dataPointAggregationType.var:
                        case dataPointAggregationType.stdev:
                        case dataPointAggregationType.entropy:
                            datatCollections[aggType, pi].Add(item.imbGetPropertySafe <double>(pi));
                            break;
                        }
                        outputData[aggType, pi] = vl;
                    }
                }

                count++;
            }

            foreach (dataPointAggregationType aggType in aggList)
            {
                foreach (PropertyInfo pi in piList)
                {
                    switch (aggType)
                    {
                    case dataPointAggregationType.count:
                        outputData[aggType, pi] = count;
                        break;

                    case dataPointAggregationType.avg:
                        outputData[aggType, pi] = outputData[dataPointAggregationType.sum, pi] / (double)count;
                        break;

                    case dataPointAggregationType.range:
                        outputData[aggType, pi] = outputData[dataPointAggregationType.max, pi] - outputData[dataPointAggregationType.min, pi];
                        break;

                    case dataPointAggregationType.firstEntry:
                        outputData[aggType, pi] = output.firstItem.imbGetPropertySafe <double>(pi);
                        break;

                    case dataPointAggregationType.lastEntry:
                        outputData[aggType, pi] = output.lastItem.imbGetPropertySafe <double>(pi);
                        break;

                    case dataPointAggregationType.var:
                        outputData[aggType, pi] = datatCollections[aggType, pi].GetVariance();
                        break;

                    case dataPointAggregationType.stdev:
                        outputData[aggType, pi] = datatCollections[aggType, pi].GetStdDeviation();
                        break;

                    case dataPointAggregationType.entropy:
                        outputData[aggType, pi] = datatCollections[aggType, pi].GetEntropy();
                        break;
                    }
                }
            }

            foreach (dataPointAggregationType aggType in aggList)
            {
                foreach (PropertyInfo pi in piList)
                {
                    output[aggType].imbSetPropertyConvertSafe(pi, outputData[aggType, pi]);
                }

                if (stringKeepLastEntry)
                {
                    foreach (PropertyInfo pi in nominalList)
                    {
                        output[aggType].imbSetPropertyConvertSafe(pi, output.lastItem.imbGetPropertySafe(pi));
                    }
                }
            }
            output.Count = count;
            return(output);
        }
Exemple #3
0
        public static double GetScoreAggregate(this IEnumerable <IWeightTableTerm> terms, IWeightTable table, termTableColumns scoreToUse = termTableColumns.tf_idf, dataPointAggregationType aggregation = dataPointAggregationType.sum)
        {
            List <double> output = new List <double>();

            foreach (IWeightTableTerm term in terms)
            {
                switch (scoreToUse)
                {
                case termTableColumns.cw:
                    output.Add(table.GetWeight(term));
                    break;

                case termTableColumns.df:
                    output.Add(table.GetBDFreq(term));
                    break;

                case termTableColumns.freqAbs:
                    output.Add(table.GetAFreq(term));
                    break;

                case termTableColumns.freqNorm:
                    output.Add(table.GetNFreq(term));
                    break;

                case termTableColumns.idf:
                    output.Add(table.GetIDF(term));
                    break;

                case termTableColumns.ncw:
                    output.Add(table.GetNWeight(term));
                    break;

                case termTableColumns.none:
                    break;

                case termTableColumns.words:
                case termTableColumns.normalizedSemanticDistance:
                case termTableColumns.semanticDistance:
                case termTableColumns.termLemma:
                case termTableColumns.termName:
                    throw new NotImplementedException();
                    break;

                case termTableColumns.tf_idf:
                    output.Add(table.GetTF_IDF(term));
                    break;
                }
            }

            switch (aggregation)
            {
            case dataPointAggregationType.avg:
                return(output.Average());

                break;

            case dataPointAggregationType.count:
                return(output.Count());

                break;

            case dataPointAggregationType.max:
                return(output.Max());

                break;

            case dataPointAggregationType.min:
                return(output.Min());

                break;

            case dataPointAggregationType.range:
                return(output.Max() - output.Min());

                break;

            case dataPointAggregationType.sum:
                return(output.Sum());

                break;

            default:
                throw new dataException("Operation not supported [" + aggregation.toString() + "]", null, table, "Aggregation operation not supported");
                return(0);

                break;
            }

            return(0);
        }
 public DataRowMetaDefinition(DataRowInReportTypeEnum __rowType, dataPointAggregationType __aggregation)
 {
     rowType     = __rowType;
     aggregation = __aggregation;
 }
        /// <summary>
        /// Adds the range rows into table.
        /// </summary>
        /// <param name="namePrefix">Row name prefix.</param>
        /// <param name="targetTable">The target table.</param>
        /// <param name="placeDataRowMarks">if set to <c>true</c> it will set styling conditioners to this table</param>
        /// <param name="rowsToAdd">The rows.</param>
        public void AddRangeRows(String namePrefix, DataTable targetTable, Boolean placeDataRowMarks, dataPointAggregationType rowsToAdd = dataPointAggregationType.sum | dataPointAggregationType.avg | dataPointAggregationType.count | dataPointAggregationType.min | dataPointAggregationType.max | dataPointAggregationType.range)
        {
            Int32 i = 0;

            foreach (DataRow dr in targetTable.Rows)
            {
                if (i < rowsToSkipFromLearning)
                {
                }
                else
                {
                    foreach (var pair in finders)
                    {
                        Object vl = dr[pair.Value.id];

                        if (vl is Int32)
                        {
                            pair.Value.Learn(Convert.ToDouble((Int32)vl));
                        }
                        else if (vl is Double)
                        {
                            pair.Value.Learn((Double)vl);
                        }
                        else if (vl is Boolean)
                        {
                            pair.Value.Learn(Convert.ToDouble((Boolean)vl));
                        }
                    }
                }
                i++;
            }

            List <String> rownamesHMax = new List <string>();
            List <String> rownamesHMin = new List <string>();
            List <String> rownamesH3   = new List <string>();



            foreach (dataPointAggregationType dt in rowsToAdd.getEnumListFromFlags())
            {
                DataRow dr   = targetTable.NewRow();
                String  name = namePrefix + " " + dt.ToString();

                if (keyColumn != null)
                {
                    dr[keyColumn.ColumnName] = name;
                }

                if (columnsToSignIn.Any())
                {
                    foreach (String cn in columnsToSignIn)
                    {
                        dr[cn] = name;
                    }
                }

                if (placeDataRowMarks && keyColumn != null)
                {
                    foreach (DataRow dd in targetTable.Rows)
                    {
                        foreach (var pair in finders)
                        {
                            rangeFinderWithData rf = pair.Value;


                            //Double vl = Convert.ToDouble(dd[rf.id].imbConvertValueSafeTyped<Double>());
                            Double vl = dd[rf.id].imbConvertValueSafeTyped <Double>();
                            switch (dt)
                            {
                            case dataPointAggregationType.max:
                                if (vl == rf.Maximum)
                                {
                                    rownamesHMax.Add(dd[keyColumnName].toStringSafe());
                                }
                                break;

                            case dataPointAggregationType.min:
                                if (vl == rf.Minimum)
                                {
                                    rownamesHMin.Add(dd[keyColumnName].toStringSafe());
                                }

                                break;
                            }
                        }
                    }
                }



                foreach (var pair in finders)
                {
                    rangeFinderWithData rf = pair.Value;

                    try
                    {
                        switch (dt)
                        {
                        case dataPointAggregationType.avg:
                            dr[rf.id] = rf.Average;
                            rownamesH3.Add(name);
                            targetTable.SetAdditionalInfoEntry("Prefix: " + dt.ToString(), "Arithmentic mean");
                            break;

                        case dataPointAggregationType.count:
                            dr[rf.id] = rf.Count;
                            targetTable.SetAdditionalInfoEntry("Prefix: " + dt.ToString(), "Number of rows");
                            break;

                        case dataPointAggregationType.max:
                            rownamesHMax.Add(name);
                            if (rf.Maximum > Double.MinValue)
                            {
                                dr[rf.id] = rf.Maximum;
                            }
                            targetTable.SetAdditionalInfoEntry("Prefix: " + dt.ToString(), "Highest value");
                            break;

                        case dataPointAggregationType.min:
                            rownamesHMin.Add(name);
                            if (rf.Minimum < Double.MaxValue)
                            {
                                dr[rf.id] = rf.Minimum;
                            }
                            targetTable.SetAdditionalInfoEntry("Prefix: " + dt.ToString(), "Smallest value");
                            break;

                        case dataPointAggregationType.range:
                            dr[rf.id] = rf.Range;
                            targetTable.SetAdditionalInfoEntry("Prefix: " + dt.ToString(), "Range of values");
                            break;

                        case dataPointAggregationType.sum:
                            dr[rf.id] = rf.Sum;
                            rownamesH3.Add(name);
                            targetTable.SetAdditionalInfoEntry("Prefix: " + dt.ToString(), "Sum");
                            break;

                        case dataPointAggregationType.entropy:
                            dr[rf.id] = rf.doubleEntries.GetEntropy(1E-06, true);
                            targetTable.SetAdditionalInfoEntry("Prefix: " + dt.ToString(), "Normalized Entropy");
                            rownamesH3.Add(name);
                            break;

                        case dataPointAggregationType.stdev:

                            dr[rf.id] = rf.doubleEntries.GetStdDeviation();
                            rownamesH3.Add(name);
                            targetTable.SetAdditionalInfoEntry("Prefix: " + dt.ToString(), "Standard Deviation");
                            break;

                        case dataPointAggregationType.var:
                            targetTable.SetAdditionalInfoEntry(dt.ToString(), "Variance");
                            dr[rf.id] = rf.doubleEntries.GetVariance();
                            rownamesH3.Add(name);
                            targetTable.SetAdditionalInfoEntry("Prefix: " + dt.ToString(), "Variance");
                            break;
                        }
                    }
                    catch (Exception ex)
                    {
                        targetTable.AddExtra("rangeFinder[" + rf.id + "] failed on [" + dt.ToString() + "] with exception: " + ex.Message);
                        targetTable.AddExtra("::: " + ex.StackTrace);
                    }
                    //if (!targetTable.GetAdditionalInfo().ContainsKey(dt)
                }



                targetTable.Rows.Add(dr);
            }


            targetTable.GetRowMetaSet().SetStyleForRowsWithValue <String>(DataRowInReportTypeEnum.dataHighlightA, keyColumnName, rownamesHMin);
            targetTable.GetRowMetaSet().SetStyleForRowsWithValue <String>(DataRowInReportTypeEnum.dataHighlightB, keyColumnName, rownamesHMax);
            targetTable.GetRowMetaSet().SetStyleForRowsWithValue <String>(DataRowInReportTypeEnum.dataHighlightC, keyColumnName, rownamesH3);
        }
Exemple #6
0
        /// <summary>Performs post-processing of data collected by the workload plugin</summary>
        /// <remarks><para>Loads all saved DataTables, groups rows in averages for each measure group and creates summary table with all experiments</para></remarks>
        /// <param name="searchPattern">pattern used to select input files</param>
        /// <param name="groupColumn">column name used for row grouping</param>
        /// <param name="overviewColumns">columns to include in overview table</param>
        /// <seealso cref="aceOperationSetExecutorBase"/>
        public void aceOperation_runWorkloadData(
            [Description("pattern used to select input files")] string searchPattern = "results*.xml",
            [Description("column name used for row grouping")] string groupColumn    = "measureGroup")
        // [Description("columns to include in overview table")] String overviewColumns = "DataLoad,CrawlerIterations,ContentPages,dlcMaximum")
        {
            aceOperation_selectFiles(searchPattern, "index\\workload", true);

            folder = folder["index\\workload"];

            List <DataTable> tables = new List <DataTable>();

            dataPointAggregationType aggType = dataPointAggregationType.avg;

            int ci = 1;
            int c  = selectedFiles.Count();

            output.log("[" + c + "] DataTable in the cue.");


            List <DataTable> allTables = new List <DataTable>();
            DataSet          dSet      = new DataSet();


            aceDictionarySet <string, DataTable> byCrawler = new aceDictionarySet <string, DataTable>();
            aceDictionarySet <string, DataTableForStatistics> byCrawlerRT = new aceDictionarySet <string, DataTableForStatistics>();

            DataTableForStatistics rt = null;

            foreach (FileInfo fi in selectedFiles)
            {
                try
                {
                    objectTable <reportPlugIn_workloadEntry> workloadEntry = new objectTable <reportPlugIn_workloadEntry>(fi.FullName, true, "EntryID", "");

                    objectTable <reportPlugIn_workloadEntry> workloadGrouped = new objectTable <reportPlugIn_workloadEntry>("EntryID", "aggregated");

                    aceDictionarySet <int, reportPlugIn_workloadEntry> workloadGroups = workloadEntry.GetGroups <int>(groupColumn, "terminationWarning = 0");

                    collectionAggregationResultSet <reportPlugIn_workloadEntry> aggregateSet = new collectionAggregationResultSet <reportPlugIn_workloadEntry>();



                    foreach (var set in workloadGroups)
                    {
                        collectionAggregationResult <reportPlugIn_workloadEntry> aggregates = null;
                        aggregates = set.Value.GetAggregates(aggType);

                        var aggregate = aggregates[aggType];
                        aggregate.measureGroup = set.Key;
                        aggregate.EntryID      = set.Key.ToString("D5") + "_" + aggType.ToString();
                        workloadGrouped.AddOrUpdate(aggregate);
                        aggregateSet.Add(aggregate.EntryID + "_" + fi.Name, aggregates);
                    }

                    string filename = (fi.Name + "_" + groupColumn + "_" + aggType.ToString()).getFilename();

                    string n = reportPlugIn_workload_state.ExtractEntryID(aggregateSet.lastItem.EntryID) + dSet.Tables.Count.ToString("D2");

                    DataTable dt = workloadGrouped.GetDataTable(dSet, n);
                    dt.SetDescription("Collection of [" + aggregateSet.recordType.Name + "] records grouped by [" + groupColumn + "]");
                    dt.SetAggregationAspect(dataPointAggregationAspect.subSetOfRows);
                    dt.SetAggregationOriginCount(aggregateSet.Count);
                    dt.SetAdditionalInfoEntry("Aggregation Type:", aggType);
                    dt.SetAdditionalInfoEntry("Data source file:", fi.Name);

                    dt.SetAdditionalInfoEntries("Last", aggregateSet.lastItem, typeof(string));

                    dt.SetTitle(n);

                    byCrawler.Add(aggregateSet.firstItem.Crawler, dt);

                    // dt.TableName = n;
                    //   dSet.AddTable(dt);


                    rt = dt.GetReportAndSave(folder, imbWEMManager.authorNotation, n.getFilename(), true);
                    byCrawlerRT.Add(aggregateSet.firstItem.Crawler, rt);
                    response.AppendLine("[" + ci + " / " + c + "] DataTable [" + fi.Name + "] had [" + workloadGroups.Keys.Count() + "] groups. Result saved as: " + filename);
                    ci++;
                } catch (Exception ex)
                {
                    output.log("[" + ci + " / " + c + "] DataTable [" + fi.FullName + "] failed.");
                    output.log(ex.Message);
                }
            }



            output.log("[" + c + "] DataTable processed.");

            dSet.serializeDataSet("workload_all", folder, dataTableExportEnum.excel, imbWEMManager.authorNotation);

            foreach (string key in byCrawler.Keys)
            {
                string  filename = key.getFilename();
                DataSet sd       = new DataSet(key);
                foreach (DataTable dti in byCrawler[key])
                {
                    sd.AddTable(dti.Copy());
                }

                sd.AddTable(byCrawlerRT[key].First().RenderLegend());
                sd.serializeDataSet(filename, folder, dataTableExportEnum.excel, imbWEMManager.authorNotation);
            }
        }
Exemple #7
0
        public DataColumnInReportDefinition Add(DataColumnInReportTypeEnum columnType, DataColumn column, dataPointAggregationType aggregation, string unit = "")
        {
            DataColumnInReportDefinition output = new DataColumnInReportDefinition();

            output.columnType       = columnType;
            output.aggregation      = aggregation;
            output.columnSourceName = column.ColumnName;
            output.columnPriority   = column.GetPriority();
            output.format           = column.GetFormat();

            Type   valueType = typeof(string);
            string name = ""; string description = ""; string letter = "";

            switch (aggregation)
            {
            default:
            case dataPointAggregationType.max:
            case dataPointAggregationType.min:
            case dataPointAggregationType.sum:
            case dataPointAggregationType.firstEntry:
            case dataPointAggregationType.lastEntry:
            case dataPointAggregationType.range:
                valueType = column.DataType;
                break;

            case dataPointAggregationType.avg:
            case dataPointAggregationType.stdev:
            case dataPointAggregationType.var:
            case dataPointAggregationType.entropy:
                valueType = typeof(double);
                if (output.format.isNullOrEmpty())
                {
                    output.format = "F5";
                }
                break;

            case dataPointAggregationType.count:
                valueType = typeof(int);
                break;
            }
            letter = column.GetLetter();

            if (columnType == DataColumnInReportTypeEnum.dataSummed)
            {
                if (!letter.isNullOrEmpty())
                {
                    letter = aggregation.ToString() + "(" + letter + ")";
                }
                output.columnLetter = letter;

                output.columnDescription = "(" + aggregation.ToString() + ") of " + column.ColumnName + ". " + column.GetDesc();
            }
            output.columnName       = column.ColumnName + " (" + aggregation.ToString() + ")";
            output.columnSourceName = column.ColumnName;

            output.importance      = column.GetImportance();
            output.columnUnit      = column.GetUnit();
            output.columnValueType = valueType;
            output.columnDefault   = valueType.GetDefaultValue();
            output.columnGroup     = column.GetGroup();

            output.spe = column.GetSPE();
            Add(column.ColumnName, output);

            return(output);
        }