/// <summary> /// Finds common substrings for each position in the texts of the specified column. /// It uses a batch approach to query for several positions (specified using SubstringQueryColumnCount) /// using a single query. /// </summary> private static async Task <SubstringsData> ExploreSubstrings( DConnection conn, ExplorerContext ctx, int substringQueryColumnCount, params int[] substringLengths) { var substrings = new SubstringsData(); foreach (var length in substringLengths) { var hasRows = true; for (var pos = 0; hasRows; pos += substringQueryColumnCount) { var query = new TextColumnSubstring(ctx.Table, ctx.Column, pos, length, substringQueryColumnCount); var sstrResult = await conn.Exec(query); hasRows = false; foreach (var row in sstrResult.Rows) { if (row.HasValue) { hasRows = true; substrings.Add(pos + row.Index, row.Value, row.Count); } } } } return(substrings); }
internal async Task <Result> ComputeIsolatorLengthDistribution() { var distribution = new List <(long Length, long Count)>(); var pos = 0; var oldCount = 0L; while (pos <= options.TextColumnMaxExplorationLength) { var columnsCount = Math.Min(options.SubstringQueryColumnCount, options.TextColumnMaxExplorationLength + 1 - pos); var query = new TextColumnSubstring(pos, 1, columnsCount, 0); var qresult = await Context.Exec(query); var rows = qresult.Rows.OrderBy(r => r.Index).ToList(); if (rows.Count > 0 && rows.All(r => r.Count == oldCount)) { break; } foreach (var row in rows) { if (row.Count > oldCount) { distribution.Add((Length: pos + row.Index, row.Count - oldCount)); } oldCount = row.Count; } pos += columnsCount; } return(new Result(distribution)); }