Exemplo n.º 1
0
        private static async Task <bool> CheckIsEmail(DConnection conn, ExplorerContext ctx)
        {
            var emailCheck = await conn.Exec(
                new TextColumnTrim(ctx.Table, ctx.Column, TextColumnTrimType.Both, Constants.EmailAddressChars));

            return(emailCheck.Rows.All(r => r.IsNull || (!r.IsSuppressed && r.Value == "@")));
        }
Exemplo n.º 2
0
        protected override IEnumerable <RuleMatch> OnMatch(ExplorerContext context)
        {
            var text = context.Text;

            if (text.HasContent)
            {
                var peekRaw = text.First();
                var peek    = IsCaseSensitive == false?char.ToUpperInvariant(peekRaw) : peekRaw;

                var first = IsCaseSensitive == false?char.ToUpperInvariant(First) : First;

                var last = IsCaseSensitive == false?char.ToUpperInvariant(Last) : Last;

                if (peek >= first && peek <= last)
                {
                    var matchText = text.Take(1);
                    var match     = new RuleMatch(
                        this,
                        matchText,
                        () => RuleOutput.ComputeOutput(matchText, new Lazy <object?>(matchText)));

                    return(new[] { match });
                }
            }

            return(RuleMatch.EmptyMatch);
        }
Exemplo n.º 3
0
        /// <summary>
        /// Drills down into succesive column combinations, combining the results.
        /// <para>
        /// For example: We have columns {A, B, C, D}
        /// First, this will get counts for {A}, {B}, {C}, {D}.
        /// then: {A, B}, {A, C}, {A, D} - ONLY if {A} returned no suppressed columns
        /// then: {B, A}, {B, C}, {B, D} - ONLY if {B} returned no suppressed columns
        /// etc...
        /// then: {A, B, C, D} - ONLY if {A, B, C} returned no suppressed columns
        /// then: {A, B, D, C} - ONLY if {A, B, D} returned no suppressed columns
        /// etc.
        /// </para>
        /// <para>
        /// Thus, it 'searches' the data sets, grouping by ever increasing column combinations, until it reaches a
        /// 'dead-end' where the combination is suppressed.
        /// </para>
        /// </summary>
        /// <param name="context">An <see cref"ExplorerContext" /> containing the query execution method.</param>
        /// <param name="projections">
        /// A list of <see cref="ColumnProjection" />s defining how to segment the columns into buckets.
        /// </param>
        /// <param name="maxLevel">
        /// The maximum number of columns to include in a subgrouping, or null for all columns.
        /// </param>
        /// <returns>A Task that resolves to a list of query result rows.</returns>
        public static async Task <IEnumerable <MultiColumnCounts.Result> > DrillDown(
            ExplorerContext context,
            IEnumerable <ColumnProjection> projections,
            int?maxLevel = null)
        {
            maxLevel ??= projections.Count();
            var numLevels = Math.Min(maxLevel.Value, projections.Count());
            var allLevels = new List <IEnumerable <MultiColumnCounts.Result> >(numLevels);

            var rootLevel = await context.Exec(new MultiColumnCountsPartial(projections));

            allLevels.Add(rootLevel.Rows);

            foreach (var depth in Enumerable.Range(1, numLevels - 1))
            {
                var currentLevel = allLevels[depth - 1];
                var nextLevel    = await DrillDownNextLevel(context, projections, currentLevel, depth);

                if (!nextLevel.Any())
                {
                    break;
                }
                allLevels.Add(nextLevel.ToList());
            }

            return(allLevels.Flatten());
        }
Exemplo n.º 4
0
        /// <summary>
        /// Finds common substrings for each position in the texts of the specified column.
        /// It uses a batch approach to query for several positions (specified using SubstringQueryColumnCount)
        /// using a single query.
        /// </summary>
        private static async Task <SubstringsData> ExploreSubstrings(
            DConnection conn,
            ExplorerContext ctx,
            int substringQueryColumnCount,
            params int[] substringLengths)
        {
            var substrings = new SubstringsData();

            foreach (var length in substringLengths)
            {
                var hasRows = true;
                for (var pos = 0; hasRows; pos += substringQueryColumnCount)
                {
                    var query      = new TextColumnSubstring(ctx.Table, ctx.Column, pos, length, substringQueryColumnCount);
                    var sstrResult = await conn.Exec(query);

                    hasRows = false;
                    foreach (var row in sstrResult.Rows)
                    {
                        if (row.HasValue)
                        {
                            hasRows = true;
                            substrings.Add(pos + row.Index, row.Value, row.Count);
                        }
                    }
                }
            }
            return(substrings);
        }
Exemplo n.º 5
0
 public DatetimeGeneratorComponent(
     ResultProvider <DatetimeDistribution> distributionProvider,
     ExplorerContext context)
 {
     this.distributionProvider = distributionProvider;
     this.context = context;
 }
Exemplo n.º 6
0
            public ExplorerContext Merge(ExplorerContext other)
            {
                if (!(other is CheckedContext checkedOther))
                {
                    throw new ArgumentException("Cannot merge two contexts of different concrete type.");
                }
                if (!ReferenceEquals(Connection, checkedOther.Connection))
                {
                    throw new ArgumentException("Cannot merge two contexts with different Connections.");
                }
                if (!string.Equals(DataSource, other.DataSource, StringComparison.Ordinal))
                {
                    throw new ArgumentException("Cannot merge two contexts with different DataSources.");
                }
                if (!string.Equals(Table, other.Table, StringComparison.Ordinal))
                {
                    throw new ArgumentException("Cannot merge two contexts with different Tables.");
                }
                if (SamplesToPublish != other.SamplesToPublish)
                {
                    throw new ArgumentException("Cannot merge two contexts with different SamplesToPublish.");
                }

                return(new CheckedContext(
                           Connection,
                           DataSource,
                           Table,
                           Columns.AddRange(other.Columns).Distinct(),
                           ColumnInfos.AddRange(other.ColumnInfos).Distinct(),
                           SamplesToPublish));
            }
Exemplo n.º 7
0
 public NumericSampleGenerator(
     ExplorerContext ctx,
     ResultProvider <NumericDistribution> distributionProvider)
 {
     this.distributionProvider = distributionProvider;
     this.ctx = ctx;
 }
Exemplo n.º 8
0
        protected override IEnumerable <RuleMatch> OnMatch(ExplorerContext context)
        {
            var primaryMatches = context.InvokeRule(_primary);

            foreach (var primaryMatch in primaryMatches)
            {
                var primaryText         = primaryMatch.Text;
                var excludingContext    = context.SubContext(primaryText.Length);
                var excludedMatches     = excludingContext.InvokeRule(_excluded);
                var excludedExactLength = from ex in excludedMatches
                                          where ex.Text.Length == primaryText.Length
                                          select ex;

                if (!excludedExactLength.Any())
                {
                    var match = new RuleMatch(
                        this,
                        primaryText,
                        () => RuleOutput.ComputeOutput(
                            primaryText,
                            new Lazy <object?>(() => primaryMatch.ComputeOutput())));

                    yield return(match);
                }
            }
        }
Exemplo n.º 9
0
        protected override IEnumerable <RuleMatch> OnMatch(ExplorerContext context)
        {
            foreach (var rule in _rules)
            {
                var potentials = context.InvokeRule(rule.Rule);

                foreach (var m in potentials)
                {
                    if (_rules.DoAllHaveNames)
                    {
                        yield return(new RuleMatch(
                                         this,
                                         m.Text,
                                         () => RuleOutput.ComputeOutput(
                                             m.Text,
                                             new Lazy <object?>(() => MakeMap(rule.Tag, m.ComputeOutput())))));
                    }
                    else
                    {
                        yield return(new RuleMatch(
                                         this,
                                         m.Text,
                                         () => RuleOutput.ComputeOutput(
                                             m.Text,
                                             new Lazy <object?>(() => m.ComputeOutput()))));
                    }
                }
            }
        }
Exemplo n.º 10
0
        protected override IEnumerable <RuleMatch> OnMatch(ExplorerContext context)
        {
            var matches = RecurseMatch(
                context.ContextID,
                context,
                context.Text,
                0,
                1,
                ImmutableList <RuleMatch> .Empty);

            foreach (var m in matches)
            {
                yield return(m);
            }
            //  We are returning the matches in decreasing order of text length, so the empty one goes last
            if (!_min.HasValue || _min.Value == 0)
            {
                var matchText = context.Text.Take(0);

                yield return(new RuleMatch(
                                 this,
                                 matchText,
                                 () => RuleOutput.ComputeOutput(
                                     matchText,
                                     new Lazy <object?>(ImmutableArray <object> .Empty))));
            }
        }
Exemplo n.º 11
0
 public ExplorerTestContext(ExplorerContext ctx)
 {
     DataSource = ctx.DataSource;
     Table      = ctx.Table;
     Column     = ctx.Column;
     ColumnType = ctx.ColumnType;
 }
Exemplo n.º 12
0
 public CorrelatedSampleGenerator(
     ResultProvider <ColumnCorrelationComponent.Result> correlationProvider,
     ExplorerContext context)
 {
     this.context             = context;
     this.correlationProvider = correlationProvider;
 }
Exemplo n.º 13
0
        public ExplorationScope Build(INestedContainer scope, ExplorerContext context)
        {
            var explorationScope = new ExplorationScope(scope);

            explorationScope.UseContext(context);
            Configure(explorationScope, context);
            return(explorationScope);
        }
Exemplo n.º 14
0
 /// <summary>
 /// Configure a column exploration.
 /// </summary>
 /// <param name="conn">A DConnection configured for the Api backend.</param>
 /// <param name="ctx">An <see cref="ExplorerContext" /> defining the exploration parameters.</param>
 /// <param name="componentConfiguration">
 /// An action to add and configure the components to use in this exploration.
 /// </param>
 /// <returns>A new ColumnExploration object.</returns>
 public ColumnExploration LaunchColumnExploration(
     DConnection conn,
     ExplorerContext ctx,
     Action <ExplorationConfig> componentConfiguration)
 {
     // This scope (and all the components resolved within) should live until the end of the Task.
     return(ExploreColumn(rootContainer.GetNestedContainer(), conn, ctx, componentConfiguration));
 }
Exemplo n.º 15
0
 private IEnumerable <RuleMatch> RecurseMatch(
     IEnumerable <TaggedRule> rules,
     //  Used only for debugging purposes, to hook on the context ID of the entire sequence
     int masterContextID,
     ExplorerContext context,
     SubString originalText,
     int totalMatchLength,
     ImmutableList <(TaggedRule rule, RuleMatch match)> matches)
Exemplo n.º 16
0
 public TextGeneratorComponent(DConnection conn, ExplorerContext ctx, EmailCheckComponent emailChecker)
 {
     this.conn                  = conn;
     this.ctx                   = ctx;
     this.emailChecker          = emailChecker;
     GeneratedValuesCount       = DefaultGeneratedValuesCount;
     EmailDomainsCountThreshold = DefaultEmailDomainsCountThreshold;
     SubstringQueryColumnCount  = DefaultSubstringQueryColumnCount;
 }
Exemplo n.º 17
0
 public NumericHistogramComponent(
     DConnection conn,
     ExplorerContext ctx,
     ResultProvider <SimpleStats <double> .Result> statsResultProvider)
 {
     this.conn = conn;
     this.ctx  = ctx;
     this.statsResultProvider = statsResultProvider;
 }
Exemplo n.º 18
0
 public TextLengthComponent(
     DConnection conn,
     ExplorerContext ctx,
     ResultProvider <IsolatorCheckComponent.Result> isolatorCheck)
 {
     this.ctx           = ctx;
     this.conn          = conn;
     this.isolatorCheck = isolatorCheck;
 }
Exemplo n.º 19
0
 protected override IEnumerable <RuleMatch> OnMatch(ExplorerContext context)
 {
     return(RecurseMatch(
                _rules,
                context.ContextID,
                context,
                context.Text,
                0,
                ImmutableList <(TaggedRule rule, RuleMatch match)> .Empty));
 }
Exemplo n.º 20
0
 protected override void Configure(ExplorationScope scope, ExplorerContext context)
 {
     if (context.Columns.Length != 1)
     {
         throw new InvalidOperationException(
                   $"{nameof(TypeBasedScopeBuilder)} expects a single-column context, got {context.Columns.Length} columns.");
     }
     CommonConfiguration(scope);
     ColumnConfiguration(scope, context);
 }
Exemplo n.º 21
0
        protected override void Configure(ExplorationScope scope, ExplorerContext context)
        {
            var metadata = context.Columns
                           .Zip2(context.ColumnInfos, SingleColumnPublishers)
                           .Select((_, i) => new SingleColumnMetadata(_.Item1, i, _.Item2, _.Item3));

            scope.AddPublisher <ColumnCorrelationComponent>(
                initialise: c => c.Projections = BuildProjections(metadata).ToImmutableArray());

            scope.AddPublisher <CorrelatedSampleGenerator>();
        }
Exemplo n.º 22
0
        protected override IEnumerable <RuleMatch> OnMatch(ExplorerContext context)
        {
            var matches     = _referencedRule.Match(context);
            var wrapMatches = from m in matches
                              select new RuleMatch(
                this,
                m.Text,
                () => RuleOutput.ComputeOutput(
                    m.Text,
                    new Lazy <object?>(() => m.ComputeOutput())));

            return(wrapMatches);
        }
Exemplo n.º 23
0
        /// <summary>
        /// Configure a column exploration within a given scope.
        /// </summary>
        /// <param name="scope">The scoped container to use for object resolution.</param>
        /// <param name="conn">A DConnection configured for the Api backend.</param>
        /// <param name="ctx">An <see cref="ExplorerContext" /> defining the exploration parameters.</param>
        /// <param name="componentConfiguration">
        /// An action to add and configure the components to use in this exploration.
        /// </param>
        /// <returns>A new ColumnExploration object.</returns>
        public static ColumnExploration ExploreColumn(
            INestedContainer scope,
            DConnection conn,
            ExplorerContext ctx,
            Action <ExplorationConfig> componentConfiguration)
        {
            // Configure a new Exploration
            var config = new ExplorationConfig(scope);

            config.UseConnection(conn);
            config.UseContext(ctx);
            config.Compose(componentConfiguration);
            return(new ColumnExploration(config, scope, ctx.Column));
        }
Exemplo n.º 24
0
        // Disabling this because the compiler can't infer Action<ExplorationScope>.
#pragma warning disable IDE0007 // Use var instead of explicit type
        private static void ColumnConfiguration(ExplorationScope scope, ExplorerContext context)
        {
            Action <ExplorationScope> configure = context.ColumnInfo.Type switch
            {
                DValueType.Integer => NumericExploration,
                DValueType.Real => NumericExploration,
                DValueType.Text => TextExploration,
                DValueType.Timestamp => DatetimeExploration,
                DValueType.Date => DatetimeExploration,
                DValueType.Datetime => DatetimeExploration,
                DValueType.Bool => BoolExploration,
                _ => throw new InvalidOperationException($"Cannot explore column type {context.ColumnInfo.Type}."),
            };

            configure(scope);
        }
Exemplo n.º 25
0
        private IEnumerable <RuleMatch> RecurseMatch(
            //  Used only for debugging purposes, to hook on the context ID of the entire sequence
            int masterContextID,
            ExplorerContext context,
            SubString originalText,
            int totalMatchLength,
            int iteration,
            ImmutableList <RuleMatch> childrenMatches)
        {
            var matches         = context.InvokeRule(_rule);
            var nonEmptyMatches = matches.Where(m => m.Text.Length != 0);

            foreach (var match in nonEmptyMatches)
            {
                var newTotalMatchLength = totalMatchLength + match.LengthWithInterleaves;
                var newChildrenMatches  = childrenMatches.Add(match);

                if (IsRepeatCountBelowMaximum(iteration + 1))
                {   //  Recurse to next iteration
                    var newContext        = context.MoveForward(match);
                    var downstreamMatches = RecurseMatch(
                        masterContextID,
                        newContext,
                        originalText,
                        newTotalMatchLength,
                        iteration + 1,
                        newChildrenMatches);

                    foreach (var m in downstreamMatches)
                    {
                        yield return(m);
                    }
                }
                //  We are returning the matches in decreasing order of text length, so the "current" one goes last
                if (IsRepeatCountInRange(iteration))
                {
                    var matchText     = originalText.Take(newTotalMatchLength);
                    var completeMatch = new RuleMatch(
                        this,
                        matchText,
                        () => ComputeOutput(matchText, newChildrenMatches));

                    yield return(completeMatch);
                }
            }
        }
Exemplo n.º 26
0
        protected override IEnumerable <RuleMatch> OnMatch(ExplorerContext context)
        {
            var text = context.Text;

            if (text.Length == 0)
            {
                return(RuleMatch.EmptyMatch);
            }
            else
            {
                var matchText = text.Take(1);
                var match     = new RuleMatch(
                    this,
                    matchText,
                    () => RuleOutput.ComputeOutput(matchText, new Lazy <object?>(matchText)));

                return(new[] { match });
            }
        }
Exemplo n.º 27
0
        protected override IEnumerable <RuleMatch> OnMatch(ExplorerContext context)
        {
            var text = context.Text;

            if (text.HasContent &&
                text.Length >= _literal.Length &&
                text.Take(_literal.Length).SequenceEqual(_literal, GetCharComparer()))
            {
                var matchText = text.Take(_literal.Length);
                var match     = new RuleMatch(
                    this,
                    matchText,
                    () => RuleOutput.ComputeOutput(
                        matchText,
                        new Lazy <object?>(() => matchText)));

                return(new[] { match });
            }
            else
            {
                return(RuleMatch.EmptyMatch);
            }
        }
Exemplo n.º 28
0
        public virtual void Dispose(bool disposing)
        {
            if (_disposed)
            {
                return;
            }

            if (disposing)
            {
                // free other managed objects that implement
                // IDisposable only

                try
                {
                    if (_objectContext != null && _objectContext.Connection.State == ConnectionState.Open)
                    {
                        _objectContext.Connection.Close();
                    }
                }
                catch (ObjectDisposedException)
                {
                    // do nothing, the objectContext has already been disposed
                }

                if (_dataContext != null)
                {
                    _dataContext.Dispose();
                    _dataContext = null;
                }
            }

            // release any unmanaged objects
            // set the object references to null

            _disposed = true;
        }
Exemplo n.º 29
0
 public DistinctValuesComponent(DConnection conn, ExplorerContext ctx)
 {
     this.ctx  = ctx;
     this.conn = conn;
 }
Exemplo n.º 30
0
 public IsolatorCheckComponent(DConnection conn, ExplorerContext ctx)
 {
     this.ctx  = ctx;
     this.conn = conn;
 }