private static async Task <bool> CheckIsEmail(DConnection conn, ExplorerContext ctx) { var emailCheck = await conn.Exec( new TextColumnTrim(ctx.Table, ctx.Column, TextColumnTrimType.Both, Constants.EmailAddressChars)); return(emailCheck.Rows.All(r => r.IsNull || (!r.IsSuppressed && r.Value == "@"))); }
protected override IEnumerable <RuleMatch> OnMatch(ExplorerContext context) { var text = context.Text; if (text.HasContent) { var peekRaw = text.First(); var peek = IsCaseSensitive == false?char.ToUpperInvariant(peekRaw) : peekRaw; var first = IsCaseSensitive == false?char.ToUpperInvariant(First) : First; var last = IsCaseSensitive == false?char.ToUpperInvariant(Last) : Last; if (peek >= first && peek <= last) { var matchText = text.Take(1); var match = new RuleMatch( this, matchText, () => RuleOutput.ComputeOutput(matchText, new Lazy <object?>(matchText))); return(new[] { match }); } } return(RuleMatch.EmptyMatch); }
/// <summary> /// Drills down into succesive column combinations, combining the results. /// <para> /// For example: We have columns {A, B, C, D} /// First, this will get counts for {A}, {B}, {C}, {D}. /// then: {A, B}, {A, C}, {A, D} - ONLY if {A} returned no suppressed columns /// then: {B, A}, {B, C}, {B, D} - ONLY if {B} returned no suppressed columns /// etc... /// then: {A, B, C, D} - ONLY if {A, B, C} returned no suppressed columns /// then: {A, B, D, C} - ONLY if {A, B, D} returned no suppressed columns /// etc. /// </para> /// <para> /// Thus, it 'searches' the data sets, grouping by ever increasing column combinations, until it reaches a /// 'dead-end' where the combination is suppressed. /// </para> /// </summary> /// <param name="context">An <see cref"ExplorerContext" /> containing the query execution method.</param> /// <param name="projections"> /// A list of <see cref="ColumnProjection" />s defining how to segment the columns into buckets. /// </param> /// <param name="maxLevel"> /// The maximum number of columns to include in a subgrouping, or null for all columns. /// </param> /// <returns>A Task that resolves to a list of query result rows.</returns> public static async Task <IEnumerable <MultiColumnCounts.Result> > DrillDown( ExplorerContext context, IEnumerable <ColumnProjection> projections, int?maxLevel = null) { maxLevel ??= projections.Count(); var numLevels = Math.Min(maxLevel.Value, projections.Count()); var allLevels = new List <IEnumerable <MultiColumnCounts.Result> >(numLevels); var rootLevel = await context.Exec(new MultiColumnCountsPartial(projections)); allLevels.Add(rootLevel.Rows); foreach (var depth in Enumerable.Range(1, numLevels - 1)) { var currentLevel = allLevels[depth - 1]; var nextLevel = await DrillDownNextLevel(context, projections, currentLevel, depth); if (!nextLevel.Any()) { break; } allLevels.Add(nextLevel.ToList()); } return(allLevels.Flatten()); }
/// <summary> /// Finds common substrings for each position in the texts of the specified column. /// It uses a batch approach to query for several positions (specified using SubstringQueryColumnCount) /// using a single query. /// </summary> private static async Task <SubstringsData> ExploreSubstrings( DConnection conn, ExplorerContext ctx, int substringQueryColumnCount, params int[] substringLengths) { var substrings = new SubstringsData(); foreach (var length in substringLengths) { var hasRows = true; for (var pos = 0; hasRows; pos += substringQueryColumnCount) { var query = new TextColumnSubstring(ctx.Table, ctx.Column, pos, length, substringQueryColumnCount); var sstrResult = await conn.Exec(query); hasRows = false; foreach (var row in sstrResult.Rows) { if (row.HasValue) { hasRows = true; substrings.Add(pos + row.Index, row.Value, row.Count); } } } } return(substrings); }
public DatetimeGeneratorComponent( ResultProvider <DatetimeDistribution> distributionProvider, ExplorerContext context) { this.distributionProvider = distributionProvider; this.context = context; }
public ExplorerContext Merge(ExplorerContext other) { if (!(other is CheckedContext checkedOther)) { throw new ArgumentException("Cannot merge two contexts of different concrete type."); } if (!ReferenceEquals(Connection, checkedOther.Connection)) { throw new ArgumentException("Cannot merge two contexts with different Connections."); } if (!string.Equals(DataSource, other.DataSource, StringComparison.Ordinal)) { throw new ArgumentException("Cannot merge two contexts with different DataSources."); } if (!string.Equals(Table, other.Table, StringComparison.Ordinal)) { throw new ArgumentException("Cannot merge two contexts with different Tables."); } if (SamplesToPublish != other.SamplesToPublish) { throw new ArgumentException("Cannot merge two contexts with different SamplesToPublish."); } return(new CheckedContext( Connection, DataSource, Table, Columns.AddRange(other.Columns).Distinct(), ColumnInfos.AddRange(other.ColumnInfos).Distinct(), SamplesToPublish)); }
public NumericSampleGenerator( ExplorerContext ctx, ResultProvider <NumericDistribution> distributionProvider) { this.distributionProvider = distributionProvider; this.ctx = ctx; }
protected override IEnumerable <RuleMatch> OnMatch(ExplorerContext context) { var primaryMatches = context.InvokeRule(_primary); foreach (var primaryMatch in primaryMatches) { var primaryText = primaryMatch.Text; var excludingContext = context.SubContext(primaryText.Length); var excludedMatches = excludingContext.InvokeRule(_excluded); var excludedExactLength = from ex in excludedMatches where ex.Text.Length == primaryText.Length select ex; if (!excludedExactLength.Any()) { var match = new RuleMatch( this, primaryText, () => RuleOutput.ComputeOutput( primaryText, new Lazy <object?>(() => primaryMatch.ComputeOutput()))); yield return(match); } } }
protected override IEnumerable <RuleMatch> OnMatch(ExplorerContext context) { foreach (var rule in _rules) { var potentials = context.InvokeRule(rule.Rule); foreach (var m in potentials) { if (_rules.DoAllHaveNames) { yield return(new RuleMatch( this, m.Text, () => RuleOutput.ComputeOutput( m.Text, new Lazy <object?>(() => MakeMap(rule.Tag, m.ComputeOutput()))))); } else { yield return(new RuleMatch( this, m.Text, () => RuleOutput.ComputeOutput( m.Text, new Lazy <object?>(() => m.ComputeOutput())))); } } } }
protected override IEnumerable <RuleMatch> OnMatch(ExplorerContext context) { var matches = RecurseMatch( context.ContextID, context, context.Text, 0, 1, ImmutableList <RuleMatch> .Empty); foreach (var m in matches) { yield return(m); } // We are returning the matches in decreasing order of text length, so the empty one goes last if (!_min.HasValue || _min.Value == 0) { var matchText = context.Text.Take(0); yield return(new RuleMatch( this, matchText, () => RuleOutput.ComputeOutput( matchText, new Lazy <object?>(ImmutableArray <object> .Empty)))); } }
public ExplorerTestContext(ExplorerContext ctx) { DataSource = ctx.DataSource; Table = ctx.Table; Column = ctx.Column; ColumnType = ctx.ColumnType; }
public CorrelatedSampleGenerator( ResultProvider <ColumnCorrelationComponent.Result> correlationProvider, ExplorerContext context) { this.context = context; this.correlationProvider = correlationProvider; }
public ExplorationScope Build(INestedContainer scope, ExplorerContext context) { var explorationScope = new ExplorationScope(scope); explorationScope.UseContext(context); Configure(explorationScope, context); return(explorationScope); }
/// <summary> /// Configure a column exploration. /// </summary> /// <param name="conn">A DConnection configured for the Api backend.</param> /// <param name="ctx">An <see cref="ExplorerContext" /> defining the exploration parameters.</param> /// <param name="componentConfiguration"> /// An action to add and configure the components to use in this exploration. /// </param> /// <returns>A new ColumnExploration object.</returns> public ColumnExploration LaunchColumnExploration( DConnection conn, ExplorerContext ctx, Action <ExplorationConfig> componentConfiguration) { // This scope (and all the components resolved within) should live until the end of the Task. return(ExploreColumn(rootContainer.GetNestedContainer(), conn, ctx, componentConfiguration)); }
private IEnumerable <RuleMatch> RecurseMatch( IEnumerable <TaggedRule> rules, // Used only for debugging purposes, to hook on the context ID of the entire sequence int masterContextID, ExplorerContext context, SubString originalText, int totalMatchLength, ImmutableList <(TaggedRule rule, RuleMatch match)> matches)
public TextGeneratorComponent(DConnection conn, ExplorerContext ctx, EmailCheckComponent emailChecker) { this.conn = conn; this.ctx = ctx; this.emailChecker = emailChecker; GeneratedValuesCount = DefaultGeneratedValuesCount; EmailDomainsCountThreshold = DefaultEmailDomainsCountThreshold; SubstringQueryColumnCount = DefaultSubstringQueryColumnCount; }
public NumericHistogramComponent( DConnection conn, ExplorerContext ctx, ResultProvider <SimpleStats <double> .Result> statsResultProvider) { this.conn = conn; this.ctx = ctx; this.statsResultProvider = statsResultProvider; }
public TextLengthComponent( DConnection conn, ExplorerContext ctx, ResultProvider <IsolatorCheckComponent.Result> isolatorCheck) { this.ctx = ctx; this.conn = conn; this.isolatorCheck = isolatorCheck; }
protected override IEnumerable <RuleMatch> OnMatch(ExplorerContext context) { return(RecurseMatch( _rules, context.ContextID, context, context.Text, 0, ImmutableList <(TaggedRule rule, RuleMatch match)> .Empty)); }
protected override void Configure(ExplorationScope scope, ExplorerContext context) { if (context.Columns.Length != 1) { throw new InvalidOperationException( $"{nameof(TypeBasedScopeBuilder)} expects a single-column context, got {context.Columns.Length} columns."); } CommonConfiguration(scope); ColumnConfiguration(scope, context); }
protected override void Configure(ExplorationScope scope, ExplorerContext context) { var metadata = context.Columns .Zip2(context.ColumnInfos, SingleColumnPublishers) .Select((_, i) => new SingleColumnMetadata(_.Item1, i, _.Item2, _.Item3)); scope.AddPublisher <ColumnCorrelationComponent>( initialise: c => c.Projections = BuildProjections(metadata).ToImmutableArray()); scope.AddPublisher <CorrelatedSampleGenerator>(); }
protected override IEnumerable <RuleMatch> OnMatch(ExplorerContext context) { var matches = _referencedRule.Match(context); var wrapMatches = from m in matches select new RuleMatch( this, m.Text, () => RuleOutput.ComputeOutput( m.Text, new Lazy <object?>(() => m.ComputeOutput()))); return(wrapMatches); }
/// <summary> /// Configure a column exploration within a given scope. /// </summary> /// <param name="scope">The scoped container to use for object resolution.</param> /// <param name="conn">A DConnection configured for the Api backend.</param> /// <param name="ctx">An <see cref="ExplorerContext" /> defining the exploration parameters.</param> /// <param name="componentConfiguration"> /// An action to add and configure the components to use in this exploration. /// </param> /// <returns>A new ColumnExploration object.</returns> public static ColumnExploration ExploreColumn( INestedContainer scope, DConnection conn, ExplorerContext ctx, Action <ExplorationConfig> componentConfiguration) { // Configure a new Exploration var config = new ExplorationConfig(scope); config.UseConnection(conn); config.UseContext(ctx); config.Compose(componentConfiguration); return(new ColumnExploration(config, scope, ctx.Column)); }
// Disabling this because the compiler can't infer Action<ExplorationScope>. #pragma warning disable IDE0007 // Use var instead of explicit type private static void ColumnConfiguration(ExplorationScope scope, ExplorerContext context) { Action <ExplorationScope> configure = context.ColumnInfo.Type switch { DValueType.Integer => NumericExploration, DValueType.Real => NumericExploration, DValueType.Text => TextExploration, DValueType.Timestamp => DatetimeExploration, DValueType.Date => DatetimeExploration, DValueType.Datetime => DatetimeExploration, DValueType.Bool => BoolExploration, _ => throw new InvalidOperationException($"Cannot explore column type {context.ColumnInfo.Type}."), }; configure(scope); }
private IEnumerable <RuleMatch> RecurseMatch( // Used only for debugging purposes, to hook on the context ID of the entire sequence int masterContextID, ExplorerContext context, SubString originalText, int totalMatchLength, int iteration, ImmutableList <RuleMatch> childrenMatches) { var matches = context.InvokeRule(_rule); var nonEmptyMatches = matches.Where(m => m.Text.Length != 0); foreach (var match in nonEmptyMatches) { var newTotalMatchLength = totalMatchLength + match.LengthWithInterleaves; var newChildrenMatches = childrenMatches.Add(match); if (IsRepeatCountBelowMaximum(iteration + 1)) { // Recurse to next iteration var newContext = context.MoveForward(match); var downstreamMatches = RecurseMatch( masterContextID, newContext, originalText, newTotalMatchLength, iteration + 1, newChildrenMatches); foreach (var m in downstreamMatches) { yield return(m); } } // We are returning the matches in decreasing order of text length, so the "current" one goes last if (IsRepeatCountInRange(iteration)) { var matchText = originalText.Take(newTotalMatchLength); var completeMatch = new RuleMatch( this, matchText, () => ComputeOutput(matchText, newChildrenMatches)); yield return(completeMatch); } } }
protected override IEnumerable <RuleMatch> OnMatch(ExplorerContext context) { var text = context.Text; if (text.Length == 0) { return(RuleMatch.EmptyMatch); } else { var matchText = text.Take(1); var match = new RuleMatch( this, matchText, () => RuleOutput.ComputeOutput(matchText, new Lazy <object?>(matchText))); return(new[] { match }); } }
protected override IEnumerable <RuleMatch> OnMatch(ExplorerContext context) { var text = context.Text; if (text.HasContent && text.Length >= _literal.Length && text.Take(_literal.Length).SequenceEqual(_literal, GetCharComparer())) { var matchText = text.Take(_literal.Length); var match = new RuleMatch( this, matchText, () => RuleOutput.ComputeOutput( matchText, new Lazy <object?>(() => matchText))); return(new[] { match }); } else { return(RuleMatch.EmptyMatch); } }
public virtual void Dispose(bool disposing) { if (_disposed) { return; } if (disposing) { // free other managed objects that implement // IDisposable only try { if (_objectContext != null && _objectContext.Connection.State == ConnectionState.Open) { _objectContext.Connection.Close(); } } catch (ObjectDisposedException) { // do nothing, the objectContext has already been disposed } if (_dataContext != null) { _dataContext.Dispose(); _dataContext = null; } } // release any unmanaged objects // set the object references to null _disposed = true; }
public DistinctValuesComponent(DConnection conn, ExplorerContext ctx) { this.ctx = ctx; this.conn = conn; }
public IsolatorCheckComponent(DConnection conn, ExplorerContext ctx) { this.ctx = ctx; this.conn = conn; }