Exemple #1
0
        /// <summary>
        /// Finds common substrings for each position in the texts of the specified column.
        /// It uses a batch approach to query for several positions (specified using SubstringQueryColumnCount)
        /// using a single query.
        /// </summary>
        private static async Task <SubstringsData> ExploreSubstrings(
            DConnection conn,
            ExplorerContext ctx,
            int substringQueryColumnCount,
            params int[] substringLengths)
        {
            var substrings = new SubstringsData();

            foreach (var length in substringLengths)
            {
                var hasRows = true;
                for (var pos = 0; hasRows; pos += substringQueryColumnCount)
                {
                    var query      = new TextColumnSubstring(ctx.Table, ctx.Column, pos, length, substringQueryColumnCount);
                    var sstrResult = await conn.Exec(query);

                    hasRows = false;
                    foreach (var row in sstrResult.Rows)
                    {
                        if (row.HasValue)
                        {
                            hasRows = true;
                            substrings.Add(pos + row.Index, row.Value, row.Count);
                        }
                    }
                }
            }
            return(substrings);
        }
        internal async Task <Result> ComputeIsolatorLengthDistribution()
        {
            var distribution = new List <(long Length, long Count)>();
            var pos          = 0;
            var oldCount     = 0L;

            while (pos <= options.TextColumnMaxExplorationLength)
            {
                var columnsCount = Math.Min(options.SubstringQueryColumnCount, options.TextColumnMaxExplorationLength + 1 - pos);
                var query        = new TextColumnSubstring(pos, 1, columnsCount, 0);
                var qresult      = await Context.Exec(query);

                var rows = qresult.Rows.OrderBy(r => r.Index).ToList();
                if (rows.Count > 0 && rows.All(r => r.Count == oldCount))
                {
                    break;
                }

                foreach (var row in rows)
                {
                    if (row.Count > oldCount)
                    {
                        distribution.Add((Length: pos + row.Index, row.Count - oldCount));
                    }
                    oldCount = row.Count;
                }
                pos += columnsCount;
            }
            return(new Result(distribution));
        }