Exemplo n.º 1
0
        /// <inheritdoc cref="StandardScanShareableAnalyzer{S}.FromAggregationResult"/>
        public virtual DoubleMetric FromAggregationResult(Row result, int offset)
        {
            if (result.Values.Length <= offset || result[offset] == null)
            {
                return(AnalyzersExt.MetricFromEmpty(this, Name, string.Join(',', Columns),
                                                    AnalyzersExt.EntityFrom(Columns)));
            }

            return(ToSuccessMetric(result.GetAs <double>(offset)));
        }
Exemplo n.º 2
0
        /// <inheritdoc cref="Analyzer{S,M}.ComputeMetricFrom"/>
        public override DoubleMetric ComputeMetricFrom(Option <S> state)
        {
            DoubleMetric metric = state.HasValue switch
            {
                true => AnalyzersExt.MetricFromValue(new Try <double>(state.Value.GetMetricValue()), Name, Instance,
                                                     _metricEntity),
                _ => AnalyzersExt.MetricFromEmpty(this, Name, Instance, _metricEntity)
            };

            return(metric);
        }
    }
Exemplo n.º 3
0
        /// <inheritdoc cref="Analyzer{S,M}.ComputeMetricFrom"/>
        public override DoubleMetric ComputeMetricFrom(Option <FrequenciesAndNumRows> state)
        {
            if (!state.HasValue)
            {
                return(AnalyzersExt.MetricFromEmpty(this, "MutualInformation", string.Join(',', Columns),
                                                    MetricEntity.Multicolumn));
            }

            long   total = state.Value.NumRows;
            string col1  = Columns.First();
            string col2  = Columns.Skip(1).First();

            string freqCol1 = $"__deequ_f1_{col1}";
            string freqCol2 = $"__deequ_f2_{col2}";

            DataFrame jointStats = state.Value.Frequencies;

            DataFrame marginalStats1 = jointStats
                                       .Select(col1, AnalyzersExt.COUNT_COL)
                                       .GroupBy(col1)
                                       .Agg(Sum(AnalyzersExt.COUNT_COL).As(freqCol1));

            DataFrame marginalStats2 = jointStats
                                       .Select(col2, AnalyzersExt.COUNT_COL)
                                       .GroupBy(col2)
                                       .Agg(Sum(AnalyzersExt.COUNT_COL).As(freqCol2));


            Func <Column, Column, Column, Column> miUdf = Udf((double px, double py, double pxy) =>
                                                              pxy / total * Math.Log(pxy / total / (px / total * (py / total))));

            string miCol = $"__deequ_mi_${col1}_$col2";

            DataFrame value = jointStats
                              .Join(marginalStats1, col1)
                              .Join(marginalStats2, col2)
                              .WithColumn(miCol,
                                          miUdf(Col(freqCol1).Cast("double"), Col(freqCol2).Cast("double"),
                                                Col(AnalyzersExt.COUNT_COL).Cast("double")))
                              .Agg(Sum(miCol));

            Row resultRow = value.First();

            if (resultRow[0] == null)
            {
                return(AnalyzersExt.MetricFromEmpty(this, "MutualInformation", string.Join(',', Columns),
                                                    MetricEntity.Multicolumn));
            }

            return(AnalyzersExt.MetricFromValue(resultRow.GetAs <double>(0), "MutualInformation",
                                                string.Join(',', Columns),
                                                MetricEntity.Multicolumn));
        }
Exemplo n.º 4
0
        /// <inheritdoc cref="ScanShareableAnalyzer{S,M}.ComputeMetricFrom"/>
        public override DoubleMetric ComputeMetricFrom(Option <FrequenciesAndNumRows> state)
        {
            if (!state.HasValue)
            {
                return(AnalyzersExt.MetricFromEmpty(this, Name, string.Join(',', Columns),
                                                    AnalyzersExt.EntityFrom(Columns)));
            }

            IEnumerable <Column> aggregations = AggregationFunctions(state.Value.NumRows);
            Row result = state.Value.Frequencies
                         .Agg(aggregations.First(),
                              aggregations.Skip(1).ToArray())
                         .Collect()
                         .FirstOrDefault();

            return(FromAggregationResult(result, 0));
        }