Ejemplo n.º 1
0
        /// <inheritdoc cref="State{T}.Sum"/>
        public override FrequenciesAndNumRows Sum(FrequenciesAndNumRows other)
        {
            IEnumerable <string> columns = Frequencies.Schema().Fields
                                           .Select(field => field.Name)
                                           .Where(field => field != AnalyzersExt.COUNT_COL);

            IEnumerable <Column> projectionAfterMerge = columns
                                                        .Select(col =>
                                                                Coalesce(Col($"this.{col}"), Col($"other.{col}")).As(col))
                                                        .Append(
                (AnalyzersExt.ZeroIfNull($"this.{AnalyzersExt.COUNT_COL}") +
                 AnalyzersExt.ZeroIfNull($"other.{AnalyzersExt.COUNT_COL}")).As(AnalyzersExt.COUNT_COL));


            Column joinCondition = columns.Aggregate(NullSafeEq(columns.First()),
                                                     (previous, result) => previous.And(NullSafeEq(result)));


            DataFrame frequenciesSum = Frequencies
                                       .Alias("this")
                                       .Join(other.Frequencies.Alias("other"), joinCondition, "outer")
                                       .Select(projectionAfterMerge.ToArray());


            return(new FrequenciesAndNumRows(frequenciesSum, NumRows + other.NumRows));
        }
Ejemplo n.º 2
0
 /// <summary>
 /// Checks if a schema has a specific column name.
 /// </summary>
 /// <param name="column">The name of the column to verify.</param>
 /// <returns>A callback asserting the presence of the column in the schema.</returns>
 private static Action <StructType> HasColumn(string column) =>
 schema =>
 {
     if (!AnalyzersExt.HasColumn(schema, column))
     {
         throw new Exception("Input data does not include column!");
     }
 };
Ejemplo n.º 3
0
 /// <inheritdoc cref="StandardScanShareableAnalyzer{S}.AdditionalPreconditions" />
 public override IEnumerable <Action <StructType> > AdditionalPreconditions()
 {
     return(new[]
     {
         AnalyzersExt.HasColumn(ColumnA), AnalyzersExt.IsNumeric(ColumnA),
         AnalyzersExt.HasColumn(ColumnB), AnalyzersExt.IsNumeric(ColumnB)
     });
 }
Ejemplo n.º 4
0
        /// <inheritdoc cref="ScanShareableAnalyzer{S,M}.AggregationFunctions"/>
        public override IEnumerable <Column> AggregationFunctions()
        {
            Column expression =
                When(RegexpExtract(Column(Column.GetOrElse(string.Empty)), Regex.ToString(), 0) != Lit(""), 1)
                .Otherwise(0);

            Column summation = Sum(AnalyzersExt.ConditionalSelection(expression, Where).Cast("integer"));

            return(new[] { summation, AnalyzersExt.ConditionalCount(Where) });
        }
Ejemplo n.º 5
0
        /// <inheritdoc cref="ScanShareableAnalyzer{S,M}.AggregationFunctions"/>
        public override IEnumerable <Column> AggregationFunctions()
        {
            Column summarization = Sum(AnalyzersExt.ConditionalSelection(Column, Where)
                                       .IsNotNull()
                                       .Cast("int"));

            Column conditional = AnalyzersExt.ConditionalCount(Where);

            return(new[] { summarization, conditional });
        }
Ejemplo n.º 6
0
        /// <inheritdoc cref="StandardScanShareableAnalyzer{S}.FromAggregationResult"/>
        public virtual DoubleMetric FromAggregationResult(Row result, int offset)
        {
            if (result.Values.Length <= offset || result[offset] == null)
            {
                return(AnalyzersExt.MetricFromEmpty(this, Name, string.Join(',', Columns),
                                                    AnalyzersExt.EntityFrom(Columns)));
            }

            return(ToSuccessMetric(result.GetAs <double>(offset)));
        }
Ejemplo n.º 7
0
        /// <inheritdoc cref="Analyzer{S,M}.ComputeMetricFrom"/>
        public override DoubleMetric ComputeMetricFrom(Option <S> state)
        {
            DoubleMetric metric = state.HasValue switch
            {
                true => AnalyzersExt.MetricFromValue(new Try <double>(state.Value.GetMetricValue()), Name, Instance,
                                                     _metricEntity),
                _ => AnalyzersExt.MetricFromEmpty(this, Name, Instance, _metricEntity)
            };

            return(metric);
        }
    }
Ejemplo n.º 8
0
        /// <inheritdoc cref="Analyzer{S,M}.ComputeMetricFrom"/>
        public override DoubleMetric ComputeMetricFrom(Option <FrequenciesAndNumRows> state)
        {
            if (!state.HasValue)
            {
                return(AnalyzersExt.MetricFromEmpty(this, "MutualInformation", string.Join(',', Columns),
                                                    MetricEntity.Multicolumn));
            }

            long   total = state.Value.NumRows;
            string col1  = Columns.First();
            string col2  = Columns.Skip(1).First();

            string freqCol1 = $"__deequ_f1_{col1}";
            string freqCol2 = $"__deequ_f2_{col2}";

            DataFrame jointStats = state.Value.Frequencies;

            DataFrame marginalStats1 = jointStats
                                       .Select(col1, AnalyzersExt.COUNT_COL)
                                       .GroupBy(col1)
                                       .Agg(Sum(AnalyzersExt.COUNT_COL).As(freqCol1));

            DataFrame marginalStats2 = jointStats
                                       .Select(col2, AnalyzersExt.COUNT_COL)
                                       .GroupBy(col2)
                                       .Agg(Sum(AnalyzersExt.COUNT_COL).As(freqCol2));


            Func <Column, Column, Column, Column> miUdf = Udf((double px, double py, double pxy) =>
                                                              pxy / total * Math.Log(pxy / total / (px / total * (py / total))));

            string miCol = $"__deequ_mi_${col1}_$col2";

            DataFrame value = jointStats
                              .Join(marginalStats1, col1)
                              .Join(marginalStats2, col2)
                              .WithColumn(miCol,
                                          miUdf(Col(freqCol1).Cast("double"), Col(freqCol2).Cast("double"),
                                                Col(AnalyzersExt.COUNT_COL).Cast("double")))
                              .Agg(Sum(miCol));

            Row resultRow = value.First();

            if (resultRow[0] == null)
            {
                return(AnalyzersExt.MetricFromEmpty(this, "MutualInformation", string.Join(',', Columns),
                                                    MetricEntity.Multicolumn));
            }

            return(AnalyzersExt.MetricFromValue(resultRow.GetAs <double>(0), "MutualInformation",
                                                string.Join(',', Columns),
                                                MetricEntity.Multicolumn));
        }
Ejemplo n.º 9
0
        /// <summary>
        /// Calculate a <see cref="Metric{T}"/> from a <see cref="State{T}"/>
        /// </summary>
        /// <param name="state">The <see cref="State{T}"/> to compute metrics from.</param>
        /// <param name="aggregateWith">The <see cref="IStateLoader"/> for previous states to include in the computation.</param>
        /// <param name="saveStateWith">The <see cref="IStatePersister"/>loader for previous states to include in the computation. </param>
        /// <returns></returns>
        public M CalculateMetric(Option <S> state, Option <IStateLoader> aggregateWith,
                                 Option <IStatePersister> saveStateWith)
        {
            Option <S> loadedState = aggregateWith
                                     .Select(value => value.Load <S>(new Option <IAnalyzer <IMetric> >((IAnalyzer <IMetric>) this)).Value);


            Option <S> stateToComputeMetricFrom = AnalyzersExt.Merge(loadedState, state);

            saveStateWith
            .Select(persister =>
                    persister.Persist(new Option <IAnalyzer <IMetric> >((IAnalyzer <IMetric>) this), stateToComputeMetricFrom));

            return(ComputeMetricFrom(stateToComputeMetricFrom));
        }
Ejemplo n.º 10
0
        /// <inheritdoc cref="ScanShareableAnalyzer{S,M}.AggregationFunctions" />
        public override IEnumerable <Column> AggregationFunctions()
        {
            //https://mathoverflow.net/a/57914
            var firstSelection  = AnalyzersExt.ConditionalSelection(ColumnA, Where);
            var secondSelection = AnalyzersExt.ConditionalSelection(ColumnB, Where);

            var count = Count(firstSelection);
            var sumX  = Sum(firstSelection);
            var sumY  = Sum(secondSelection);
            var sumXY = Sum(firstSelection * secondSelection);
            var sumX2 = Sum(firstSelection * firstSelection);
            var sumY2 = Sum(secondSelection * secondSelection);

            //double n, double sumX, double sumY, double sumXY, double sumX2, double sumY2
            return(new[] { count, sumX, sumY, sumXY, sumX2, sumY2 });
        }
Ejemplo n.º 11
0
        /// <inheritdoc cref="ScanShareableAnalyzer{S,M}.ComputeMetricFrom"/>
        public override DoubleMetric ComputeMetricFrom(Option <FrequenciesAndNumRows> state)
        {
            if (!state.HasValue)
            {
                return(AnalyzersExt.MetricFromEmpty(this, Name, string.Join(',', Columns),
                                                    AnalyzersExt.EntityFrom(Columns)));
            }

            IEnumerable <Column> aggregations = AggregationFunctions(state.Value.NumRows);
            Row result = state.Value.Frequencies
                         .Agg(aggregations.First(),
                              aggregations.Skip(1).ToArray())
                         .Collect()
                         .FirstOrDefault();

            return(FromAggregationResult(result, 0));
        }
Ejemplo n.º 12
0
 /// <inheritdoc cref="FrequencyBasedAnalyzer.Preconditions"/>
 public override IEnumerable <Action <StructType> > Preconditions() =>
 AnalyzersExt.ExactlyNColumns(Columns, 2).Concat(base.Preconditions());
Ejemplo n.º 13
0
 /// <inheritdoc cref="StandardScanShareableAnalyzer{S}.AdditionalPreconditions"/>
 public override IEnumerable <Action <StructType> > AdditionalPreconditions() =>
 new[] { AnalyzersExt.HasColumn(Column), AnalyzersExt.IsString(Column.GetOrElse(string.Empty)) };
Ejemplo n.º 14
0
 /// <inheritdoc cref="ScanShareableAnalyzer{S,M}.FromAggregationResult"/>
 protected override Option <NumMatchesAndCount> FromAggregationResult(Row result, int offset) =>
 AnalyzersExt.IfNoNullsIn(result, offset,
                          () => new NumMatchesAndCount(
                              (int)result.Get(offset), (int)result.Get(offset + 1)), 2);
Ejemplo n.º 15
0
 /// <inheritdoc cref="StandardScanShareableAnalyzer{S}.AdditionalPreconditions"/>
 public override IEnumerable <Action <StructType> > AdditionalPreconditions() =>
 new[] { AnalyzersExt.HasColumn(Column), AnalyzersExt.IsNotNested(Column) };
Ejemplo n.º 16
0
 /// <inheritdoc cref="StandardScanShareableAnalyzer{S}.FromAggregationResult"/>
 protected override Option <SumState> FromAggregationResult(Row result, int offset) =>
 AnalyzersExt.IfNoNullsIn(result, offset, () => new SumState(result.GetAs <double>(offset)));
Ejemplo n.º 17
0
 /// <inheritdoc cref="StandardScanShareableAnalyzer{S}.AggregationFunctions"/>
 public override IEnumerable <Column> AggregationFunctions() =>
 new[] { Sum(AnalyzersExt.ConditionalSelection(Column, Where)).Cast("double") };
Ejemplo n.º 18
0
        /// <inheritdoc cref="ScanShareableAnalyzer{S,M}.AggregationFunctions"/>
        public override IEnumerable <Column> AggregationFunctions()
        {
            Column summation = Sum(AnalyzersExt.ConditionalSelection(Predicate, Where).Cast("int"));

            return(new[] { summation, AnalyzersExt.ConditionalCount(Where) });
        }
Ejemplo n.º 19
0
 /// <inheritdoc cref="GroupingAnalyzer{S,M}.Preconditions"/>
 public override IEnumerable <Action <StructType> > Preconditions() =>
 new[] { AnalyzersExt.AtLeastOne(Columns) }
 .Concat(Columns.Select(AnalyzersExt.HasColumn))
 .Concat(Columns.Select(AnalyzersExt.IsNotNested))
 .Concat(base.Preconditions());
Ejemplo n.º 20
0
 /// <inheritdoc cref="ScanShareableAnalyzer{S,M}.FromAggregationResult"/>
 protected override Option <MeanState> FromAggregationResult(Row result, int offset) =>
 AnalyzersExt.IfNoNullsIn(result, offset,
                          () => new MeanState((double)result.Get(offset),
                                              (int)result.Get(offset + 1)), 2);
Ejemplo n.º 21
0
 /// <inheritdoc cref="Analyzer{S,M}.ToFailureMetric"/>
 public override DoubleMetric ToFailureMetric(Exception e) =>
 AnalyzersExt.MetricFromFailure(e, Name, Instance, _metricEntity);
Ejemplo n.º 22
0
 /// <inheritdoc cref="ScanShareableAnalyzer{S,M}.FromAggregationResult"/>.
 protected override Option <DataTypeHistogram> FromAggregationResult(Row result, int offset) =>
 AnalyzersExt.IfNoNullsIn(result, offset,
                          () => { return(DataTypeHistogram.FromArray(result.Values.Select(value => (int)value).ToArray())); });
Ejemplo n.º 23
0
 /// <inheritdoc cref="StandardScanShareableAnalyzer{S}.AggregationFunctions"/>
 public override IEnumerable <Column> AggregationFunctions() =>
 new[] { AnalyzersExt.ConditionalCount(Where) }.AsEnumerable();
Ejemplo n.º 24
0
 /// <summary>
 /// Converts an <see cref="Exception"/> to a <see cref="DoubleMetric"/>.
 /// </summary>
 /// <param name="exception">The exception to convert into <see cref="DoubleMetric"/>.</param>
 /// <returns>The instance of type <see cref="DoubleMetric"/>  that represents the exception.</returns>
 public override DoubleMetric ToFailureMetric(Exception exception) =>
 AnalyzersExt.MetricFromFailure(exception, Name, string.Join(',', Columns),
                                AnalyzersExt.EntityFrom(Columns));
Ejemplo n.º 25
0
 /// <summary>
 /// Converts a value to a <see cref="DoubleMetric"/>.
 /// </summary>
 /// <param name="value">The value to convert into <see cref="DoubleMetric"/>.</param>
 /// <returns>The instance of type <see cref="DoubleMetric"/> that represents the value.</returns>
 protected DoubleMetric ToSuccessMetric(double value) =>
 AnalyzersExt.MetricFromValue(value, Name, string.Join(',', Columns),
                              AnalyzersExt.EntityFrom(Columns));
Ejemplo n.º 26
0
 /// <inheritdoc cref="StandardScanShareableAnalyzer{S}.FromAggregationResult"/>
 protected override Option <NumMatches> FromAggregationResult(Row result, int offset) =>
 AnalyzersExt.IfNoNullsIn(result, offset,
                          () => new NumMatches(result.GetAs <int>(offset)));
Ejemplo n.º 27
0
        /// <inheritdoc cref="StandardScanShareableAnalyzer{S}.AggregationFunctions"/>
        public override IEnumerable <Column> AggregationFunctions()
        {
            Column col = AnalyzersExt.ConditionalSelection(Expr(Column.GetOrElse(string.Empty)), Where);

            return(new[] { Struct(Count(col), Avg(col), StddevPop(col)) });
        }
Ejemplo n.º 28
0
 /// <inheritdoc cref="Analyzer{S,M}.ToFailureMetric"/>
 public override DoubleMetric ToFailureMetric(Exception e) =>
 AnalyzersExt.MetricFromFailure(e, "MutualInformation", string.Join(',', Columns),
                                MetricEntity.Multicolumn);
Ejemplo n.º 29
0
        public override IEnumerable <Column> AggregationFunctions()
        {
            Column selection = AnalyzersExt.ConditionalSelection(Predicate, Where);

            return(new[] { selection, Count("*") }.AsEnumerable());
        }
Ejemplo n.º 30
0
 /// <inheritdoc cref="ScanShareableAnalyzer{S,M}.AggregationFunctions"/>.
 public override IEnumerable <Column> AggregationFunctions() =>
 new[] { AnalyzersExt.ConditionalSelection(Column, Where) };