public WorkflowRunner(XDatabaseContext context) { this.XDatabaseContext = context; _versionCache = new Cache <ItemVersions>(); _currentTableVersions = new Cache <LatestTableForCutoff>(); _sources = new HashSet <string>(StringComparer.OrdinalIgnoreCase); }
public static string Build(string tableName, XDatabaseContext context, string outputFormat) { IXTable builder = null; try { Stopwatch w = Stopwatch.StartNew(); // Reset the dependency DateTime check context.NewestDependency = DateTime.MinValue; // Recursively build dependencies and return a reader for the result table builder = context.Runner.Build(tableName, context); string outputPath = null; if ((String.IsNullOrEmpty(outputFormat) || outputFormat.Equals("xform", StringComparison.OrdinalIgnoreCase)) && builder is BinaryTableReader) { // If the binary format was requested and we already built it, return the path written outputPath = ((BinaryTableReader)builder).TablePath; } else { ItemVersion latestReportVersion = context.StreamProvider.ItemVersions(LocationType.Report, tableName).LatestBeforeCutoff(CrawlType.Full, context.RequestedAsOfDateTime); if (latestReportVersion != null) { outputPath = Path.Combine(context.StreamProvider.Path(LocationType.Report, tableName, CrawlType.Full, latestReportVersion.AsOfDate), $"Report.{outputFormat}"); } if (latestReportVersion == null || latestReportVersion.AsOfDate < context.NewestDependency || context.RebuiltSomething || !context.StreamProvider.Attributes(outputPath).Exists) { // If the report needs to be rebuilt, make it and return the path outputPath = Path.Combine(context.StreamProvider.Path(LocationType.Report, tableName, CrawlType.Full, context.NewestDependency), $"Report.{outputFormat}"); if (outputFormat.Equals("xform", StringComparison.OrdinalIgnoreCase)) { BinaryTableWriter.Build(builder, context, outputPath).RunAndDispose(); } else { new TabularFileWriter(builder, context.StreamProvider, outputPath).RunAndDispose(); } context.RebuiltSomething = true; } } w.Stop(); Trace.WriteLine($"Done. \"{outputPath}\" {(context.RebuiltSomething ? "written" : "up-to-date")} in {w.Elapsed.ToFriendlyString()}."); return(outputPath); } finally { if (builder != null) { builder.Dispose(); builder = null; } } }
public void Pop(XDatabaseContext outer) { if (outer != null) { outer.NewestDependency = outer.NewestDependency.BiggestOf(this.NewestDependency); outer.RebuiltSomething |= this.RebuiltSomething; } }
public InteractiveRunner(XDatabaseContext context) { s_commandCachePath = Environment.ExpandEnvironmentVariables(@"%TEMP%\XForm.Last.xql"); _xDatabaseContext = context; _pipeline = null; _stages = new List <IXTable>(); _commands = new List <string>(); }
public IXTable ReadSource(string tableName, XDatabaseContext context) { List <IXTable> sources = new List <IXTable>(); // Find the latest source of this type ItemVersions sourceVersions = context.StreamProvider.ItemVersions(LocationType.Source, tableName); ItemVersion latestFullSource = sourceVersions.LatestBeforeCutoff(CrawlType.Full, context.RequestedAsOfDateTime); // If there are no sources, there's nothing to rebuild from if (sourceVersions.Versions == null || sourceVersions.Versions.Count == 0) { return(null); } // Find the latest already converted table ItemVersions tableVersions = context.StreamProvider.ItemVersions(LocationType.Table, tableName); ItemVersion latestBuiltTable = tableVersions.LatestBeforeCutoff(CrawlType.Full, context.RequestedAsOfDateTime); // If no source or table was found, throw if (latestFullSource == null && latestBuiltTable == null) { throw new UsageException(tableName, "[Table]", context.StreamProvider.SourceNames()); } // Read the Table or the Full Crawl Source, whichever is newer DateTime incrementalNeededAfterCutoff; if (latestBuiltTable != null && (latestFullSource == null || !IsOutOfDate(latestBuiltTable.AsOfDate, latestFullSource.AsOfDate)) && TableMetadataSerializer.UncachedExists(context.StreamProvider, latestBuiltTable.Path)) { // If the table is current, reuse it sources.Add(BinaryTableReader.Build(context.StreamProvider, latestBuiltTable.Path)); incrementalNeededAfterCutoff = latestBuiltTable.AsOfDate; } else { // Otherwise, build a new table from the latest source full crawl sources.AddRange(context.StreamProvider.Enumerate(latestFullSource.Path, EnumerateTypes.File, true).Select((sa) => new TabularFileReader(context.StreamProvider, sa.Path))); incrementalNeededAfterCutoff = latestFullSource.AsOfDate; } // Add incremental crawls between the full source and the reporting date DateTime latestComponent = incrementalNeededAfterCutoff; foreach (ItemVersion incrementalCrawl in sourceVersions.VersionsInRange(CrawlType.Inc, incrementalNeededAfterCutoff, context.RequestedAsOfDateTime)) { sources.AddRange(context.StreamProvider.Enumerate(incrementalCrawl.Path, EnumerateTypes.File, true).Select((sa) => new TabularFileReader(context.StreamProvider, sa.Path))); latestComponent = latestComponent.BiggestOf(incrementalCrawl.AsOfDate); } // Report the latest incorporated source back context.NewestDependency = latestComponent; // Return the source (if a single) or concatenated group (if multiple parts) return(ConcatenatedTable.Build(sources)); }
private void Run(string query, string format, int rowCountLimit, int colCountLimit, DateTime asOfDate, IHttpResponse response) { IXTable pipeline = null; try { XDatabaseContext context = _xDatabaseContext; // Build for another moment in time if requested if (asOfDate != _xDatabaseContext.RequestedAsOfDateTime) { context = new XDatabaseContext(_xDatabaseContext) { RequestedAsOfDateTime = asOfDate }; } // Build a Pipeline for the Query pipeline = context.Query(query); // If there was no query, return an empty result if (pipeline == null) { return; } // Restrict the row and column count if requested if (rowCountLimit >= 0 || colCountLimit > 0) { pipeline = new Verbs.Limit(pipeline, rowCountLimit, colCountLimit); } // Build a writer for the desired format pipeline = new TabularFileWriter(pipeline, WriterForFormat(format, response)); // Run the query and return the output pipeline.RunWithoutDispose(); } catch (ColumnDataNotFoundException ex) { // If column data is missing, delete the table to try to spur re-creating it // NOTE: This logic will likely need to be updated when columns are downloaded remotely; multi-threaded scenarios will be complex. string tablePath = Path.Combine(ex.ColumnPath, @"..\..\.."); TableMetadataSerializer.Delete(_xDatabaseContext.StreamProvider, tablePath); } finally { if (pipeline != null) { pipeline.Dispose(); pipeline = null; } } }
public HttpService(XDatabaseContext xDatabaseContext) { _xDatabaseContext = xDatabaseContext; _suggester = new QuerySuggester(_xDatabaseContext); _server = new BackgroundWebServer(5073, "index.html", "Web"); _server.AddResponder("suggest", Suggest); _server.AddResponder("run", Run); _server.AddResponder("download", Download); _server.AddResponder("count", CountWithinTimeout); _server.AddResponder("save", Save); _server.AddResponder("test", Test); }
private void CountWithinTimeout(string query, TimeSpan timeout, DateTime asOfDate, IHttpResponse response) { IXTable pipeline = null; try { XDatabaseContext context = _xDatabaseContext; // Build for another moment in time if requested if (asOfDate != _xDatabaseContext.RequestedAsOfDateTime) { context = new XDatabaseContext(_xDatabaseContext) { RequestedAsOfDateTime = asOfDate }; } // Build a Pipeline for the Query pipeline = context.Query(query); // If there was no query, return an empty result if (pipeline == null) { return; } // Try to get the count up to the timeout if (Debugger.IsAttached) { timeout = TimeSpan.MaxValue; } RunResult result = pipeline.RunUntilTimeout(timeout); using (ITabularWriter writer = WriterForFormat("json", response)) { writer.SetColumns(new string[] { "Count", "IsComplete", "RuntimeMs" }); writer.Write(result.RowCount); writer.Write(result.IsComplete); writer.Write((int)result.Elapsed.TotalMilliseconds); writer.NextRow(); } } finally { if (pipeline != null) { pipeline.Dispose(); pipeline = null; } } }
public XDatabaseContext(XDatabaseContext copyFrom) : this() { if (copyFrom != null) { this.Runner = copyFrom.Runner; this.StreamProvider = copyFrom.StreamProvider; this.Logger = copyFrom.Logger; this.Parser = copyFrom.Parser; this.CurrentTable = copyFrom.CurrentTable; this.CurrentQuery = copyFrom.CurrentQuery; this.RequestedAsOfDateTime = copyFrom.RequestedAsOfDateTime; this.ForceSingleThreaded = copyFrom.ForceSingleThreaded; } }
private static long RunFileQuery(string queryFilePath, XDatabaseContext context) { string query = File.ReadAllText(queryFilePath); long rowsWritten = 0; using (new TraceWatch(query)) { using (IXTable source = context.Query(query)) { rowsWritten = source.RunWithoutDispose(); } } Console.WriteLine($"Done. {rowsWritten:n0} rows written."); return(rowsWritten); }
public PerformanceComparisons(XDatabaseContext context) { Context = context; Random r = new Random(); Count = 50 * 1000 * 1000; // Two 50M item arrays - _values is 0-999, _thresholds is always 50 Values = new ushort[Count]; Thresholds = new ushort[Count]; for (int i = 0; i < Values.Length; ++i) { Values[i] = (ushort)r.Next(1000); Thresholds[i] = 50; } }
private void Run(string query, string format, int rowCountLimit, int colCountLimit, DateTime asOfDate, IHttpResponse response) { IXTable pipeline = null; try { XDatabaseContext context = _xDatabaseContext; // Build for another moment in time if requested if (asOfDate != _xDatabaseContext.RequestedAsOfDateTime) { context = new XDatabaseContext(_xDatabaseContext) { RequestedAsOfDateTime = asOfDate }; } // Build a Pipeline for the Query pipeline = context.Query(query); // Restrict the row and column count if requested if (rowCountLimit >= 0 || colCountLimit > 0) { pipeline = new Verbs.Limit(pipeline, rowCountLimit, colCountLimit); } // Build a writer for the desired format using (ITabularWriter writer = WriterForFormat(format, response)) { // Run the query and return the output pipeline = new TabularFileWriter(pipeline, writer); pipeline.RunAndDispose(); } } finally { if (pipeline != null) { pipeline.Dispose(); pipeline = null; } } }
/// <summary> /// Generate builds a sample table with a huge number of tiny rows for scale testing. /// </summary> /// <remarks> /// [Segment] - ushort looping from 0-65,535 /// [Status] - one of five string values (1b Enum Column) /// [WasEncrypted] - true 33%, false 67% /// /// Each row is 4b in binary format and ~20b as a CSV row. /// 5B rows is ~20GB in binary format and a ~100GB CSV. /// </remarks> /// <param name="rowCount">Number of rows to generate</param> /// <param name="databaseRoot">Database Root folder in which to generate</param> public static void Generate(long rowCount, XDatabaseContext context) { BinaryTableWriter.ColumnFileSizeLimit = 1 * 1024 * 1024 * 1024; String8Block block = new String8Block(); String8[] statuses = new [] { "New", "Active", "Completed", "In Progress", "Blocked" }.Select((value) => block.GetCopy(value)).ToArray(); string tablePath = context.StreamProvider.Path(LocationType.Table, "HugeSample", CrawlType.Full, DateTime.UtcNow.Date); using (new TraceWatch($"Generating HugeSample \r\n with {rowCount:n0} rows\r\n to {context.StreamProvider.Description}\\{tablePath}...")) { IXTable table = new RepeatingArrayTable(rowCount, ushort.MaxValue) .WithColumn("Segment", Enumerable.Range(0, ushort.MaxValue).Select((i) => (ushort)i).ToArray()) .WithColumn("Status", Enumerable.Range(0, ushort.MaxValue).Select((i) => statuses[i % statuses.Length]).ToArray()) .WithColumn("WasEncrypted", Enumerable.Range(0, ushort.MaxValue).Select((i) => (i % 3) == 1).ToArray()) .Query($@"write ""{tablePath}""", context); table.RunAndDispose(); } }
public static int Main(string[] args) { XDatabaseContext context = new XDatabaseContext(); context.RequestedAsOfDateTime = DateTime.MaxValue; context.StreamProvider = new StreamProviderCache(new LocalFileStreamProvider(Environment.CurrentDirectory)); context.Runner = new WorkflowRunner(context); foreach (string arg in args) { if (arg.Equals("+cache", StringComparison.OrdinalIgnoreCase)) { ColumnCache.IsEnabled = true; } if (arg.Equals("-parallel", StringComparison.OrdinalIgnoreCase)) { context.ForceSingleThreaded = true; } } return(Run(args, context)); }
public void Choose() { XDatabaseContext context = new XDatabaseContext(); int[] rankPattern = new int[] { 2, 3, 1, 4, 6, 5, 7, 9, 8 }; // Build three arrays int distinctCount = 100000; int countPerID = 9; int length = countPerID * distinctCount; int[] id = new int[length]; int[] rank = new int[length]; int[] value = new int[length]; for (int i = 0; i < length; ++i) { id[i] = i / countPerID; // ID is the same for three rows at a time rank[i] = rankPattern[i % countPerID]; // Rank is [2, 3, 1] repeating (so the middle is the biggest) value[i] = i; // Value is the index of the real row } using (Benchmarker b = new Benchmarker($"Choose [{length:n0}]", 3 * DefaultMeasureMilliseconds)) { b.Measure("Choose", length, () => { IXTable actual = Context.FromArrays(length) .WithColumn("ID", id) .WithColumn("Rank", rank) .WithColumn("Value", value) .Query("choose Max [Rank] [ID]", context); return(actual.Count()); }); } }
public IXTable Build(string tableName, XDatabaseContext context) { // Ask the workflow runner to defer computing dependencies now return(_inner.Build(tableName, context, true)); }
public WorkflowRunner(XDatabaseContext context) { this.XDatabaseContext = context; _versionCache = new Cache <ItemVersions>(); _currentTableVersions = new Cache <LatestTableForCutoff>(); }
public IXTable Build(string sourceName, XDatabaseContext context) { return(_xDatabaseContext.Runner.Build(sourceName, context)); }
public IXTable Build(string tableName, XDatabaseContext outerContext, bool deferred) { // Validate the source name is recognized if (!Sources.Contains(tableName)) { // If it wasn't in cache, check individually for it live if (!XDatabaseContext.StreamProvider.ContainsTable(tableName)) { throw new UsageException(tableName, "Table", Sources); } // If found, update the cache UpdateSources(); } // If only a Date was passed for AsOfDate, look for the last version as of that day if (outerContext.RequestedAsOfDateTime.TimeOfDay == TimeSpan.Zero) { outerContext.RequestedAsOfDateTime = outerContext.RequestedAsOfDateTime.AddDays(1).AddSeconds(-1); } // If we previously found the latest for this table, just return it again LatestTableForCutoff previousLatest; if (_currentTableVersions.TryGet(tableName, out previousLatest) && previousLatest.Cutoff >= outerContext.RequestedAsOfDateTime && previousLatest.TableVersion.AsOfDate <= outerContext.RequestedAsOfDateTime && TableMetadataSerializer.UncachedExists(outerContext.StreamProvider, previousLatest.TableVersion.Path)) { outerContext.NewestDependency = previousLatest.TableVersion.AsOfDate; return(BinaryTableReader.Build(outerContext.StreamProvider, previousLatest.TableVersion.Path)); } // Create a context to track what we're building now XDatabaseContext innerContext = XDatabaseContext.Push(outerContext); innerContext.CurrentTable = tableName; // If this is a query, there won't be a cached table - just build a pipeline to make it StreamAttributes queryAttributes = innerContext.StreamProvider.Attributes(innerContext.StreamProvider.Path(LocationType.Query, tableName, ".xql")); if (queryAttributes.Exists) { IXTable queryPipeline = innerContext.Query(innerContext.StreamProvider.ReadAllText(queryAttributes.Path)); innerContext.Pop(outerContext); return(queryPipeline); } // Find the latest already built result, and associated query ItemVersions tableVersions = innerContext.StreamProvider.ItemVersions(LocationType.Table, tableName); ItemVersion latestTable = tableVersions.LatestBeforeCutoff(CrawlType.Full, outerContext.RequestedAsOfDateTime); string latestTableQuery = null; if (latestTable != null) { latestTableQuery = TableMetadataSerializer.Read(outerContext.StreamProvider, latestTable.Path).Query; } // Set the dependency date to the latest table we've already built (if any) innerContext.NewestDependency = (latestTable == null ? DateTime.MinValue : latestTable.AsOfDate); // Determine the XQL to build the table and construct a builder which can do so string xql; IXTable builder; // Find the config to build the table and scan dependency versions to determine whether table is out-of-date StreamAttributes configAttributes = innerContext.StreamProvider.Attributes(innerContext.StreamProvider.Path(LocationType.Config, tableName, ".xql")); if (!configAttributes.Exists) { // If this is a simple source, just reading it is how to build it xql = $"read {XqlScanner.Escape(tableName, TokenType.Value)}"; // Build a reader concatenating all needed pieces builder = ReadSource(tableName, innerContext); } else { // If there is a config, the config is how to build it xql = innerContext.StreamProvider.ReadAllText(configAttributes.Path); // Build a pipeline for the query, recursively creating dependencies builder = innerContext.Query(xql); } // If we don't have the table or the source, we have to throw if (latestTable == null && builder == null) { throw new UsageException(tableName, "Table", innerContext.StreamProvider.Tables()); } // Get the path we're either reading or building string tablePath = innerContext.StreamProvider.Path(LocationType.Table, tableName, CrawlType.Full, innerContext.NewestDependency); // If we can rebuild this table and we need to (sources rebuilt, query changed, out-of-date, deleted), rebuild it if (builder != null) { if (latestTable == null || innerContext.RebuiltSomething || (latestTableQuery != null && xql != latestTableQuery) || IsOutOfDate(latestTable.AsOfDate, innerContext.NewestDependency) || !TableMetadataSerializer.UncachedExists(outerContext.StreamProvider, latestTable.Path)) { // If we're not running now, just return how to build it if (deferred) { return(builder); } // Otherwise, build it now; we'll return the query to read the output innerContext.CurrentQuery = xql; Trace.WriteLine($"COMPUTE: [{innerContext.NewestDependency.ToString(StreamProviderExtensions.DateTimeFolderFormat)}] {tableName}"); BinaryTableWriter.Build(builder, innerContext, tablePath).RunAndDispose(); innerContext.RebuiltSomething = true; } } // Report the newest dependency in this chain to the components above innerContext.Pop(outerContext); _currentTableVersions.Add(tableName, new LatestTableForCutoff(outerContext.RequestedAsOfDateTime, new ItemVersion(LocationType.Table, tableName, CrawlType.Full, innerContext.NewestDependency))); return(BinaryTableReader.Build(innerContext.StreamProvider, tablePath)); }
public IXTable Build(string tableName, XDatabaseContext outerContext) { return(Build(tableName, outerContext, false)); }
public static int Run(string[] args, XDatabaseContext context) { try { // Enable native acceleration by default NativeAccelerator.Enable(); if (args == null || args.Length == 0) { return((int)new InteractiveRunner(context).Run()); } string command = args[0].ToLowerInvariant(); switch (command) { case "run": if (args.Length < 2) { throw new UsageException("'run' [QueryFilePath]"); } return((int)RunFileQuery(args[1], context)); case "add": if (args.Length < 3) { throw new UsageException("'add' [SourceFileOrDirectory] [AsSourceName] [Full|Incremental?] [AsOfDateTimeUtc?]"); } context.StreamProvider.Add( args[1], args[2], ParseCrawlTypeOrDefault(args, 3, CrawlType.Full), ParseDateTimeOrDefault(args, 4, DateTime.MinValue)); Console.WriteLine($"Done. \"{args[1]}\" added as Source \"{args[2]}\"."); return(0); case "clean": Console.WriteLine("Cleaning Production folder..."); context.StreamProvider.Clean( ParseBooleanOrDefault(args, 1, true), ParseDateTimeOrDefault(args, 2, default(DateTime))); Console.WriteLine("Done. Clean pass complete."); return(0); case "build": if (args.Length < 2) { throw new UsageException($"'build' [Table] [OutputFormat?] [AsOfDateTimeUtc?]", context.Runner.SourceNames); } context.RequestedAsOfDateTime = ParseDateTimeOrDefault(args, 3, context.RequestedAsOfDateTime); string outputPath = ReportWriter.Build( args[1], context, (args.Length > 2 ? args[2] : "xform")); return(0); case "http": case "web": new HttpService(context).Run(); return(0); case "perf": new PerformanceComparisons(context).Run(); return(0); default: throw new UsageException($"Unknown XForm mode '{command}'."); } } catch (UsageException ex) when(!Debugger.IsAttached) { Console.WriteLine(ex.Message); return(-2); } catch (Exception ex) when(!Debugger.IsAttached) { Console.WriteLine($"Error: {ex.ToString()}"); return(-1); } }
public static XDatabaseContext Push(XDatabaseContext outer) { return(new XDatabaseContext(outer)); }