Example #1
0
        private static long CountSource(IXTable source, int desiredCount, CancellationToken cancellationToken)
        {
            if (source is ISeekableXTable)
            {
                // If this is a List, just get the count
                return(((ISeekableXTable)source).Count);
            }
            else if (source is ConcatenatedTable)
            {
                // If this is multiple tables, count them in parallel
                ConcatenatedTable cSource = (ConcatenatedTable)source;
                List <IXTable>    parts   = cSource.Sources.ToList();

                long[] counts = new long[parts.Count];
                Parallel.For(0, parts.Count, (i) => counts[i] = CountSource(parts[i], desiredCount, cancellationToken));
                return(counts.Sum());
            }
            else
            {
                // Accumulate count over all rows from source
                long count = 0;
                while (true)
                {
                    int batchCount = source.Next(Math.Max(desiredCount, XTableExtensions.DefaultBatchSize), cancellationToken);
                    if (batchCount == 0)
                    {
                        break;
                    }
                    count += batchCount;
                }

                return(count);
            }
        }
Example #2
0
        public IXTable ReadSource(string tableName, XDatabaseContext context)
        {
            List <IXTable> sources = new List <IXTable>();

            // Find the latest source of this type
            ItemVersions sourceVersions   = context.StreamProvider.ItemVersions(LocationType.Source, tableName);
            ItemVersion  latestFullSource = sourceVersions.LatestBeforeCutoff(CrawlType.Full, context.RequestedAsOfDateTime);

            // If there are no sources, there's nothing to rebuild from
            if (sourceVersions.Versions == null || sourceVersions.Versions.Count == 0)
            {
                return(null);
            }

            // Find the latest already converted table
            ItemVersions tableVersions    = context.StreamProvider.ItemVersions(LocationType.Table, tableName);
            ItemVersion  latestBuiltTable = tableVersions.LatestBeforeCutoff(CrawlType.Full, context.RequestedAsOfDateTime);

            // If no source or table was found, throw
            if (latestFullSource == null && latestBuiltTable == null)
            {
                throw new UsageException(tableName, "[Table]", context.StreamProvider.SourceNames());
            }

            // Read the Table or the Full Crawl Source, whichever is newer
            DateTime incrementalNeededAfterCutoff;

            if (latestBuiltTable != null &&
                (latestFullSource == null || !IsOutOfDate(latestBuiltTable.AsOfDate, latestFullSource.AsOfDate)) &&
                TableMetadataSerializer.UncachedExists(context.StreamProvider, latestBuiltTable.Path))
            {
                // If the table is current, reuse it
                sources.Add(BinaryTableReader.Build(context.StreamProvider, latestBuiltTable.Path));
                incrementalNeededAfterCutoff = latestBuiltTable.AsOfDate;
            }
            else
            {
                // Otherwise, build a new table from the latest source full crawl
                sources.AddRange(context.StreamProvider.Enumerate(latestFullSource.Path, EnumerateTypes.File, true).Select((sa) => new TabularFileReader(context.StreamProvider, sa.Path)));
                incrementalNeededAfterCutoff = latestFullSource.AsOfDate;
            }

            // Add incremental crawls between the full source and the reporting date
            DateTime latestComponent = incrementalNeededAfterCutoff;

            foreach (ItemVersion incrementalCrawl in sourceVersions.VersionsInRange(CrawlType.Inc, incrementalNeededAfterCutoff, context.RequestedAsOfDateTime))
            {
                sources.AddRange(context.StreamProvider.Enumerate(incrementalCrawl.Path, EnumerateTypes.File, true).Select((sa) => new TabularFileReader(context.StreamProvider, sa.Path)));
                latestComponent = latestComponent.BiggestOf(incrementalCrawl.AsOfDate);
            }

            // Report the latest incorporated source back
            context.NewestDependency = latestComponent;

            // Return the source (if a single) or concatenated group (if multiple parts)
            return(ConcatenatedTable.Build(sources));
        }
        /// <summary>
        ///  WrapParallel builds a parallel copy of the query stage for each source in ConcatenatingTable sources.
        ///  It is used to allow running optimized query stages and running in parallel when multiple tables are involved.
        /// </summary>
        /// <remarks>
        ///  WrapParallel can only be used by verbs where the output when run on the concatenated inputs rows from many sources
        ///  produces the same output as running in parallel on each source and then concatenating the result rows.
        /// </remarks>
        /// <param name="source">IXTable to wrap</param>
        /// <param name="builder">Wrapping function</param>
        /// <returns>Wrapped IXTable</returns>
        public static IXTable WrapParallel(this IXTable source, XqlParser parser, Func <IXTable, IXTable> builder)
        {
            ConcatenatedTable cSource = source as ConcatenatedTable;

            if (cSource != null)
            {
                Position currentPosition = parser.CurrentPosition;
                return(ConcatenatedTable.Build(cSource.Sources.Select((s) =>
                {
                    parser.RewindTo(currentPosition);
                    return builder(s);
                })));
            }

            return(builder(source));
        }
Example #4
0
        public IXTable Build(IXTable source, XDatabaseContext context)
        {
            if (source != null)
            {
                throw new ArgumentException($"'read' must be the first stage in a pipeline.");
            }

            List <IXTable> sources = new List <IXTable>();

            // Identify the interval and table name requested
            TimeSpan interval = context.Parser.NextTimeSpan();

            if (interval < TimeSpan.Zero)
            {
                throw new ArgumentException($"'interval' must be positive. (For last 7 days, use '7d')");
            }
            string tableName = (string)context.Parser.NextLiteralValue();

            // Determine the range of versions to include (from the as of date or now if not provided)
            DateTime rangeEnd   = (context.RequestedAsOfDateTime == DateTime.MaxValue ? DateTime.UtcNow : context.RequestedAsOfDateTime);
            DateTime rangeStart = rangeEnd.Subtract(interval);

            // Find versions available
            ItemVersions versions = context.StreamProvider.ItemVersions(LocationType.Source, tableName);

            if (versions.Versions.Count == 0)
            {
                versions = context.StreamProvider.ItemVersions(LocationType.Table, tableName);
            }
            if (versions.Versions.Count == 0)
            {
                throw new ArgumentException($"'{tableName}' was not found as a Source or Table.");
            }

            // Find the first version to include (if any) - the last full version before the start was 'current' at the start moment
            ItemVersion previous = versions.LatestBeforeCutoff(CrawlType.Full, rangeStart);

            XDatabaseContext historicalContext;

            foreach (ItemVersion version in versions.VersionsInRange(CrawlType.Full, rangeStart, rangeEnd))
            {
                // Add the version before this one, if any (including any incremental pieces)
                if (previous != null)
                {
                    historicalContext = new XDatabaseContext(context);
                    historicalContext.RequestedAsOfDateTime = version.AsOfDate.AddSeconds(-1);
                    sources.Add(context.Runner.Build(tableName, historicalContext));
                }

                previous = version;
            }

            // Add 'last' up to the requested moment
            historicalContext = new XDatabaseContext(context);
            historicalContext.RequestedAsOfDateTime = rangeEnd;
            sources.Add(context.Runner.Build(tableName, historicalContext));

            // Communicate the latest component as of date back to the builder
            historicalContext.Pop(context);

            // Return the source(s) found
            return(ConcatenatedTable.Build(sources));
        }