Example #1
0
 public WorkflowRunner(XDatabaseContext context)
 {
     this.XDatabaseContext = context;
     _versionCache         = new Cache <ItemVersions>();
     _currentTableVersions = new Cache <LatestTableForCutoff>();
     _sources = new HashSet <string>(StringComparer.OrdinalIgnoreCase);
 }
Example #2
0
        public static string Build(string tableName, XDatabaseContext context, string outputFormat)
        {
            IXTable builder = null;

            try
            {
                Stopwatch w = Stopwatch.StartNew();

                // Reset the dependency DateTime check
                context.NewestDependency = DateTime.MinValue;

                // Recursively build dependencies and return a reader for the result table
                builder = context.Runner.Build(tableName, context);

                string outputPath = null;

                if ((String.IsNullOrEmpty(outputFormat) || outputFormat.Equals("xform", StringComparison.OrdinalIgnoreCase)) && builder is BinaryTableReader)
                {
                    // If the binary format was requested and we already built it, return the path written
                    outputPath = ((BinaryTableReader)builder).TablePath;
                }
                else
                {
                    ItemVersion latestReportVersion = context.StreamProvider.ItemVersions(LocationType.Report, tableName).LatestBeforeCutoff(CrawlType.Full, context.RequestedAsOfDateTime);
                    if (latestReportVersion != null)
                    {
                        outputPath = Path.Combine(context.StreamProvider.Path(LocationType.Report, tableName, CrawlType.Full, latestReportVersion.AsOfDate), $"Report.{outputFormat}");
                    }

                    if (latestReportVersion == null || latestReportVersion.AsOfDate < context.NewestDependency || context.RebuiltSomething || !context.StreamProvider.Attributes(outputPath).Exists)
                    {
                        // If the report needs to be rebuilt, make it and return the path
                        outputPath = Path.Combine(context.StreamProvider.Path(LocationType.Report, tableName, CrawlType.Full, context.NewestDependency), $"Report.{outputFormat}");
                        if (outputFormat.Equals("xform", StringComparison.OrdinalIgnoreCase))
                        {
                            BinaryTableWriter.Build(builder, context, outputPath).RunAndDispose();
                        }
                        else
                        {
                            new TabularFileWriter(builder, context.StreamProvider, outputPath).RunAndDispose();
                        }

                        context.RebuiltSomething = true;
                    }
                }

                w.Stop();
                Trace.WriteLine($"Done. \"{outputPath}\" {(context.RebuiltSomething ? "written" : "up-to-date")} in {w.Elapsed.ToFriendlyString()}.");
                return(outputPath);
            }
            finally
            {
                if (builder != null)
                {
                    builder.Dispose();
                    builder = null;
                }
            }
        }
Example #3
0
 public void Pop(XDatabaseContext outer)
 {
     if (outer != null)
     {
         outer.NewestDependency  = outer.NewestDependency.BiggestOf(this.NewestDependency);
         outer.RebuiltSomething |= this.RebuiltSomething;
     }
 }
Example #4
0
        public InteractiveRunner(XDatabaseContext context)
        {
            s_commandCachePath = Environment.ExpandEnvironmentVariables(@"%TEMP%\XForm.Last.xql");
            _xDatabaseContext  = context;

            _pipeline = null;
            _stages   = new List <IXTable>();
            _commands = new List <string>();
        }
Example #5
0
        public IXTable ReadSource(string tableName, XDatabaseContext context)
        {
            List <IXTable> sources = new List <IXTable>();

            // Find the latest source of this type
            ItemVersions sourceVersions   = context.StreamProvider.ItemVersions(LocationType.Source, tableName);
            ItemVersion  latestFullSource = sourceVersions.LatestBeforeCutoff(CrawlType.Full, context.RequestedAsOfDateTime);

            // If there are no sources, there's nothing to rebuild from
            if (sourceVersions.Versions == null || sourceVersions.Versions.Count == 0)
            {
                return(null);
            }

            // Find the latest already converted table
            ItemVersions tableVersions    = context.StreamProvider.ItemVersions(LocationType.Table, tableName);
            ItemVersion  latestBuiltTable = tableVersions.LatestBeforeCutoff(CrawlType.Full, context.RequestedAsOfDateTime);

            // If no source or table was found, throw
            if (latestFullSource == null && latestBuiltTable == null)
            {
                throw new UsageException(tableName, "[Table]", context.StreamProvider.SourceNames());
            }

            // Read the Table or the Full Crawl Source, whichever is newer
            DateTime incrementalNeededAfterCutoff;

            if (latestBuiltTable != null &&
                (latestFullSource == null || !IsOutOfDate(latestBuiltTable.AsOfDate, latestFullSource.AsOfDate)) &&
                TableMetadataSerializer.UncachedExists(context.StreamProvider, latestBuiltTable.Path))
            {
                // If the table is current, reuse it
                sources.Add(BinaryTableReader.Build(context.StreamProvider, latestBuiltTable.Path));
                incrementalNeededAfterCutoff = latestBuiltTable.AsOfDate;
            }
            else
            {
                // Otherwise, build a new table from the latest source full crawl
                sources.AddRange(context.StreamProvider.Enumerate(latestFullSource.Path, EnumerateTypes.File, true).Select((sa) => new TabularFileReader(context.StreamProvider, sa.Path)));
                incrementalNeededAfterCutoff = latestFullSource.AsOfDate;
            }

            // Add incremental crawls between the full source and the reporting date
            DateTime latestComponent = incrementalNeededAfterCutoff;

            foreach (ItemVersion incrementalCrawl in sourceVersions.VersionsInRange(CrawlType.Inc, incrementalNeededAfterCutoff, context.RequestedAsOfDateTime))
            {
                sources.AddRange(context.StreamProvider.Enumerate(incrementalCrawl.Path, EnumerateTypes.File, true).Select((sa) => new TabularFileReader(context.StreamProvider, sa.Path)));
                latestComponent = latestComponent.BiggestOf(incrementalCrawl.AsOfDate);
            }

            // Report the latest incorporated source back
            context.NewestDependency = latestComponent;

            // Return the source (if a single) or concatenated group (if multiple parts)
            return(ConcatenatedTable.Build(sources));
        }
Example #6
0
        private void Run(string query, string format, int rowCountLimit, int colCountLimit, DateTime asOfDate, IHttpResponse response)
        {
            IXTable pipeline = null;

            try
            {
                XDatabaseContext context = _xDatabaseContext;

                // Build for another moment in time if requested
                if (asOfDate != _xDatabaseContext.RequestedAsOfDateTime)
                {
                    context = new XDatabaseContext(_xDatabaseContext)
                    {
                        RequestedAsOfDateTime = asOfDate
                    };
                }

                // Build a Pipeline for the Query
                pipeline = context.Query(query);

                // If there was no query, return an empty result
                if (pipeline == null)
                {
                    return;
                }

                // Restrict the row and column count if requested
                if (rowCountLimit >= 0 || colCountLimit > 0)
                {
                    pipeline = new Verbs.Limit(pipeline, rowCountLimit, colCountLimit);
                }

                // Build a writer for the desired format
                pipeline = new TabularFileWriter(pipeline, WriterForFormat(format, response));

                // Run the query and return the output
                pipeline.RunWithoutDispose();
            }
            catch (ColumnDataNotFoundException ex)
            {
                // If column data is missing, delete the table to try to spur re-creating it
                // NOTE: This logic will likely need to be updated when columns are downloaded remotely; multi-threaded scenarios will be complex.
                string tablePath = Path.Combine(ex.ColumnPath, @"..\..\..");
                TableMetadataSerializer.Delete(_xDatabaseContext.StreamProvider, tablePath);
            }
            finally
            {
                if (pipeline != null)
                {
                    pipeline.Dispose();
                    pipeline = null;
                }
            }
        }
Example #7
0
        public HttpService(XDatabaseContext xDatabaseContext)
        {
            _xDatabaseContext = xDatabaseContext;
            _suggester        = new QuerySuggester(_xDatabaseContext);

            _server = new BackgroundWebServer(5073, "index.html", "Web");
            _server.AddResponder("suggest", Suggest);
            _server.AddResponder("run", Run);
            _server.AddResponder("download", Download);
            _server.AddResponder("count", CountWithinTimeout);
            _server.AddResponder("save", Save);
            _server.AddResponder("test", Test);
        }
Example #8
0
        private void CountWithinTimeout(string query, TimeSpan timeout, DateTime asOfDate, IHttpResponse response)
        {
            IXTable pipeline = null;

            try
            {
                XDatabaseContext context = _xDatabaseContext;

                // Build for another moment in time if requested
                if (asOfDate != _xDatabaseContext.RequestedAsOfDateTime)
                {
                    context = new XDatabaseContext(_xDatabaseContext)
                    {
                        RequestedAsOfDateTime = asOfDate
                    };
                }

                // Build a Pipeline for the Query
                pipeline = context.Query(query);

                // If there was no query, return an empty result
                if (pipeline == null)
                {
                    return;
                }

                // Try to get the count up to the timeout
                if (Debugger.IsAttached)
                {
                    timeout = TimeSpan.MaxValue;
                }
                RunResult result = pipeline.RunUntilTimeout(timeout);

                using (ITabularWriter writer = WriterForFormat("json", response))
                {
                    writer.SetColumns(new string[] { "Count", "IsComplete", "RuntimeMs" });
                    writer.Write(result.RowCount);
                    writer.Write(result.IsComplete);
                    writer.Write((int)result.Elapsed.TotalMilliseconds);
                    writer.NextRow();
                }
            }
            finally
            {
                if (pipeline != null)
                {
                    pipeline.Dispose();
                    pipeline = null;
                }
            }
        }
Example #9
0
        public XDatabaseContext(XDatabaseContext copyFrom) : this()
        {
            if (copyFrom != null)
            {
                this.Runner                = copyFrom.Runner;
                this.StreamProvider        = copyFrom.StreamProvider;
                this.Logger                = copyFrom.Logger;
                this.Parser                = copyFrom.Parser;
                this.CurrentTable          = copyFrom.CurrentTable;
                this.CurrentQuery          = copyFrom.CurrentQuery;
                this.RequestedAsOfDateTime = copyFrom.RequestedAsOfDateTime;

                this.ForceSingleThreaded = copyFrom.ForceSingleThreaded;
            }
        }
Example #10
0
        private static long RunFileQuery(string queryFilePath, XDatabaseContext context)
        {
            string query = File.ReadAllText(queryFilePath);

            long rowsWritten = 0;

            using (new TraceWatch(query))
            {
                using (IXTable source = context.Query(query))
                {
                    rowsWritten = source.RunWithoutDispose();
                }
            }

            Console.WriteLine($"Done. {rowsWritten:n0} rows written.");
            return(rowsWritten);
        }
        public PerformanceComparisons(XDatabaseContext context)
        {
            Context = context;
            Random r = new Random();

            Count = 50 * 1000 * 1000;

            // Two 50M item arrays - _values is 0-999, _thresholds is always 50
            Values     = new ushort[Count];
            Thresholds = new ushort[Count];

            for (int i = 0; i < Values.Length; ++i)
            {
                Values[i]     = (ushort)r.Next(1000);
                Thresholds[i] = 50;
            }
        }
Example #12
0
        private void Run(string query, string format, int rowCountLimit, int colCountLimit, DateTime asOfDate, IHttpResponse response)
        {
            IXTable pipeline = null;

            try
            {
                XDatabaseContext context = _xDatabaseContext;

                // Build for another moment in time if requested
                if (asOfDate != _xDatabaseContext.RequestedAsOfDateTime)
                {
                    context = new XDatabaseContext(_xDatabaseContext)
                    {
                        RequestedAsOfDateTime = asOfDate
                    };
                }

                // Build a Pipeline for the Query
                pipeline = context.Query(query);

                // Restrict the row and column count if requested
                if (rowCountLimit >= 0 || colCountLimit > 0)
                {
                    pipeline = new Verbs.Limit(pipeline, rowCountLimit, colCountLimit);
                }

                // Build a writer for the desired format
                using (ITabularWriter writer = WriterForFormat(format, response))
                {
                    // Run the query and return the output
                    pipeline = new TabularFileWriter(pipeline, writer);
                    pipeline.RunAndDispose();
                }
            }
            finally
            {
                if (pipeline != null)
                {
                    pipeline.Dispose();
                    pipeline = null;
                }
            }
        }
Example #13
0
        /// <summary>
        ///  Generate builds a sample table with a huge number of tiny rows for scale testing.
        /// </summary>
        /// <remarks>
        ///     [Segment]      - ushort looping from 0-65,535
        ///     [Status]       - one of five string values (1b Enum Column)
        ///     [WasEncrypted] - true 33%, false 67%
        ///
        ///     Each row is 4b in binary format and ~20b as a CSV row.
        ///     5B rows is ~20GB in binary format and a ~100GB CSV.
        /// </remarks>
        /// <param name="rowCount">Number of rows to generate</param>
        /// <param name="databaseRoot">Database Root folder in which to generate</param>
        public static void Generate(long rowCount, XDatabaseContext context)
        {
            BinaryTableWriter.ColumnFileSizeLimit = 1 * 1024 * 1024 * 1024;

            String8Block block = new String8Block();

            String8[] statuses = new [] { "New", "Active", "Completed", "In Progress", "Blocked" }.Select((value) => block.GetCopy(value)).ToArray();

            string tablePath = context.StreamProvider.Path(LocationType.Table, "HugeSample", CrawlType.Full, DateTime.UtcNow.Date);

            using (new TraceWatch($"Generating HugeSample \r\n  with {rowCount:n0} rows\r\n  to {context.StreamProvider.Description}\\{tablePath}..."))
            {
                IXTable table = new RepeatingArrayTable(rowCount, ushort.MaxValue)
                                .WithColumn("Segment", Enumerable.Range(0, ushort.MaxValue).Select((i) => (ushort)i).ToArray())
                                .WithColumn("Status", Enumerable.Range(0, ushort.MaxValue).Select((i) => statuses[i % statuses.Length]).ToArray())
                                .WithColumn("WasEncrypted", Enumerable.Range(0, ushort.MaxValue).Select((i) => (i % 3) == 1).ToArray())
                                .Query($@"write ""{tablePath}""", context);

                table.RunAndDispose();
            }
        }
Example #14
0
        public static int Main(string[] args)
        {
            XDatabaseContext context = new XDatabaseContext();

            context.RequestedAsOfDateTime = DateTime.MaxValue;
            context.StreamProvider        = new StreamProviderCache(new LocalFileStreamProvider(Environment.CurrentDirectory));
            context.Runner = new WorkflowRunner(context);

            foreach (string arg in args)
            {
                if (arg.Equals("+cache", StringComparison.OrdinalIgnoreCase))
                {
                    ColumnCache.IsEnabled = true;
                }
                if (arg.Equals("-parallel", StringComparison.OrdinalIgnoreCase))
                {
                    context.ForceSingleThreaded = true;
                }
            }

            return(Run(args, context));
        }
        public void Choose()
        {
            XDatabaseContext context = new XDatabaseContext();

            int[] rankPattern = new int[] { 2, 3, 1, 4, 6, 5, 7, 9, 8 };

            // Build three arrays
            int distinctCount = 100000;
            int countPerID    = 9;
            int length        = countPerID * distinctCount;

            int[] id    = new int[length];
            int[] rank  = new int[length];
            int[] value = new int[length];

            for (int i = 0; i < length; ++i)
            {
                id[i]    = i / countPerID;               // ID is the same for three rows at a time
                rank[i]  = rankPattern[i % countPerID];  // Rank is [2, 3, 1] repeating (so the middle is the biggest)
                value[i] = i;                            // Value is the index of the real row
            }

            using (Benchmarker b = new Benchmarker($"Choose [{length:n0}]", 3 * DefaultMeasureMilliseconds))
            {
                b.Measure("Choose", length, () =>
                {
                    IXTable actual = Context.FromArrays(length)
                                     .WithColumn("ID", id)
                                     .WithColumn("Rank", rank)
                                     .WithColumn("Value", value)
                                     .Query("choose Max [Rank] [ID]", context);

                    return(actual.Count());
                });
            }
        }
Example #16
0
 public IXTable Build(string tableName, XDatabaseContext context)
 {
     // Ask the workflow runner to defer computing dependencies now
     return(_inner.Build(tableName, context, true));
 }
Example #17
0
 public WorkflowRunner(XDatabaseContext context)
 {
     this.XDatabaseContext = context;
     _versionCache         = new Cache <ItemVersions>();
     _currentTableVersions = new Cache <LatestTableForCutoff>();
 }
Example #18
0
 public IXTable Build(string sourceName, XDatabaseContext context)
 {
     return(_xDatabaseContext.Runner.Build(sourceName, context));
 }
Example #19
0
        public IXTable Build(string tableName, XDatabaseContext outerContext, bool deferred)
        {
            // Validate the source name is recognized
            if (!Sources.Contains(tableName))
            {
                // If it wasn't in cache, check individually for it live
                if (!XDatabaseContext.StreamProvider.ContainsTable(tableName))
                {
                    throw new UsageException(tableName, "Table", Sources);
                }

                // If found, update the cache
                UpdateSources();
            }

            // If only a Date was passed for AsOfDate, look for the last version as of that day
            if (outerContext.RequestedAsOfDateTime.TimeOfDay == TimeSpan.Zero)
            {
                outerContext.RequestedAsOfDateTime = outerContext.RequestedAsOfDateTime.AddDays(1).AddSeconds(-1);
            }

            // If we previously found the latest for this table, just return it again
            LatestTableForCutoff previousLatest;

            if (_currentTableVersions.TryGet(tableName, out previousLatest) &&
                previousLatest.Cutoff >= outerContext.RequestedAsOfDateTime &&
                previousLatest.TableVersion.AsOfDate <= outerContext.RequestedAsOfDateTime &&
                TableMetadataSerializer.UncachedExists(outerContext.StreamProvider, previousLatest.TableVersion.Path))
            {
                outerContext.NewestDependency = previousLatest.TableVersion.AsOfDate;
                return(BinaryTableReader.Build(outerContext.StreamProvider, previousLatest.TableVersion.Path));
            }

            // Create a context to track what we're building now
            XDatabaseContext innerContext = XDatabaseContext.Push(outerContext);

            innerContext.CurrentTable = tableName;

            // If this is a query, there won't be a cached table - just build a pipeline to make it
            StreamAttributes queryAttributes = innerContext.StreamProvider.Attributes(innerContext.StreamProvider.Path(LocationType.Query, tableName, ".xql"));

            if (queryAttributes.Exists)
            {
                IXTable queryPipeline = innerContext.Query(innerContext.StreamProvider.ReadAllText(queryAttributes.Path));
                innerContext.Pop(outerContext);
                return(queryPipeline);
            }

            // Find the latest already built result, and associated query
            ItemVersions tableVersions    = innerContext.StreamProvider.ItemVersions(LocationType.Table, tableName);
            ItemVersion  latestTable      = tableVersions.LatestBeforeCutoff(CrawlType.Full, outerContext.RequestedAsOfDateTime);
            string       latestTableQuery = null;

            if (latestTable != null)
            {
                latestTableQuery = TableMetadataSerializer.Read(outerContext.StreamProvider, latestTable.Path).Query;
            }

            // Set the dependency date to the latest table we've already built (if any)
            innerContext.NewestDependency = (latestTable == null ? DateTime.MinValue : latestTable.AsOfDate);

            // Determine the XQL to build the table and construct a builder which can do so
            string  xql;
            IXTable builder;

            // Find the config to build the table and scan dependency versions to determine whether table is out-of-date
            StreamAttributes configAttributes = innerContext.StreamProvider.Attributes(innerContext.StreamProvider.Path(LocationType.Config, tableName, ".xql"));

            if (!configAttributes.Exists)
            {
                // If this is a simple source, just reading it is how to build it
                xql = $"read {XqlScanner.Escape(tableName, TokenType.Value)}";

                // Build a reader concatenating all needed pieces
                builder = ReadSource(tableName, innerContext);
            }
            else
            {
                // If there is a config, the config is how to build it
                xql = innerContext.StreamProvider.ReadAllText(configAttributes.Path);

                // Build a pipeline for the query, recursively creating dependencies
                builder = innerContext.Query(xql);
            }

            // If we don't have the table or the source, we have to throw
            if (latestTable == null && builder == null)
            {
                throw new UsageException(tableName, "Table", innerContext.StreamProvider.Tables());
            }

            // Get the path we're either reading or building
            string tablePath = innerContext.StreamProvider.Path(LocationType.Table, tableName, CrawlType.Full, innerContext.NewestDependency);

            // If we can rebuild this table and we need to (sources rebuilt, query changed, out-of-date, deleted), rebuild it
            if (builder != null)
            {
                if (latestTable == null ||
                    innerContext.RebuiltSomething ||
                    (latestTableQuery != null && xql != latestTableQuery) ||
                    IsOutOfDate(latestTable.AsOfDate, innerContext.NewestDependency) ||
                    !TableMetadataSerializer.UncachedExists(outerContext.StreamProvider, latestTable.Path))
                {
                    // If we're not running now, just return how to build it
                    if (deferred)
                    {
                        return(builder);
                    }

                    // Otherwise, build it now; we'll return the query to read the output
                    innerContext.CurrentQuery = xql;
                    Trace.WriteLine($"COMPUTE: [{innerContext.NewestDependency.ToString(StreamProviderExtensions.DateTimeFolderFormat)}] {tableName}");
                    BinaryTableWriter.Build(builder, innerContext, tablePath).RunAndDispose();
                    innerContext.RebuiltSomething = true;
                }
            }

            // Report the newest dependency in this chain to the components above
            innerContext.Pop(outerContext);

            _currentTableVersions.Add(tableName, new LatestTableForCutoff(outerContext.RequestedAsOfDateTime, new ItemVersion(LocationType.Table, tableName, CrawlType.Full, innerContext.NewestDependency)));
            return(BinaryTableReader.Build(innerContext.StreamProvider, tablePath));
        }
Example #20
0
 public IXTable Build(string tableName, XDatabaseContext outerContext)
 {
     return(Build(tableName, outerContext, false));
 }
Example #21
0
        public static int Run(string[] args, XDatabaseContext context)
        {
            try
            {
                // Enable native acceleration by default
                NativeAccelerator.Enable();

                if (args == null || args.Length == 0)
                {
                    return((int)new InteractiveRunner(context).Run());
                }

                string command = args[0].ToLowerInvariant();
                switch (command)
                {
                case "run":
                    if (args.Length < 2)
                    {
                        throw new UsageException("'run' [QueryFilePath]");
                    }
                    return((int)RunFileQuery(args[1], context));

                case "add":
                    if (args.Length < 3)
                    {
                        throw new UsageException("'add' [SourceFileOrDirectory] [AsSourceName] [Full|Incremental?] [AsOfDateTimeUtc?]");
                    }

                    context.StreamProvider.Add(
                        args[1],
                        args[2],
                        ParseCrawlTypeOrDefault(args, 3, CrawlType.Full),
                        ParseDateTimeOrDefault(args, 4, DateTime.MinValue));

                    Console.WriteLine($"Done. \"{args[1]}\" added as Source \"{args[2]}\".");
                    return(0);

                case "clean":
                    Console.WriteLine("Cleaning Production folder...");

                    context.StreamProvider.Clean(
                        ParseBooleanOrDefault(args, 1, true),
                        ParseDateTimeOrDefault(args, 2, default(DateTime)));

                    Console.WriteLine("Done. Clean pass complete.");
                    return(0);

                case "build":
                    if (args.Length < 2)
                    {
                        throw new UsageException($"'build' [Table] [OutputFormat?] [AsOfDateTimeUtc?]", context.Runner.SourceNames);
                    }
                    context.RequestedAsOfDateTime = ParseDateTimeOrDefault(args, 3, context.RequestedAsOfDateTime);
                    string outputPath = ReportWriter.Build(
                        args[1],
                        context,
                        (args.Length > 2 ? args[2] : "xform"));

                    return(0);

                case "http":
                case "web":
                    new HttpService(context).Run();
                    return(0);

                case "perf":
                    new PerformanceComparisons(context).Run();
                    return(0);

                default:
                    throw new UsageException($"Unknown XForm mode '{command}'.");
                }
            }
            catch (UsageException ex) when(!Debugger.IsAttached)
            {
                Console.WriteLine(ex.Message);
                return(-2);
            }
            catch (Exception ex) when(!Debugger.IsAttached)
            {
                Console.WriteLine($"Error: {ex.ToString()}");
                return(-1);
            }
        }
Example #22
0
 public static XDatabaseContext Push(XDatabaseContext outer)
 {
     return(new XDatabaseContext(outer));
 }