private Task InitializePredictionStoreAsync(OperationContext context) { return(context.PerformOperationAsync( Tracer, async() => { var centralStorage = (ContentLocationStore as TransitioningContentLocationStore)?.LocalLocationStore?.CentralStorage; if (Settings.ContentPlacementPredictionsBlob != null) { var checkpointDirectory = Path.Combine(LocalCacheRootMachineLocation.Path, "PlacementPredictions"); _predictionStore = new RocksDbContentPlacementPredictionStore(checkpointDirectory, clean: false); await _predictionStore.StartupAsync(context).ThrowIfFailure(); var fileName = Path.Combine(checkpointDirectory, PredictionBlobNameFile); if (!File.Exists(fileName) || File.ReadAllText(fileName) != Settings.ContentPlacementPredictionsBlob) { Directory.Delete(checkpointDirectory); Directory.CreateDirectory(checkpointDirectory); var zipFile = Path.Combine(checkpointDirectory, "snapshot.zip"); await centralStorage.TryGetFileAsync(context, Settings.ContentPlacementPredictionsBlob, new AbsolutePath(zipFile)).ThrowIfFailure(); _predictionStore.UncompressSnapshot(context, zipFile).ThrowIfFailure(); } } return BoolResult.Success; })); }
private static void ClassifyInstances(Args arguments) { // and a couple of checks here Contract.Requires(arguments.OutputDirectory != null, "You must specify an output directory"); Contract.Requires(arguments.InputDirectory != null, "You must specify an input directory"); Contract.Requires(Directory.Exists(arguments.OutputDirectory), "The output directory must exist"); Contract.Requires(Directory.Exists(arguments.InputDirectory), "The input directory must exist"); var sharedCount = 0; var nonSharedCount = 0; try { // load the classifier first s_logger.Info("Loading classifier..."); // work on the content store s_logger.Info($"Initializing store at [{arguments.OutputDirectory}]"); var store = new RocksDbContentPlacementPredictionStore(arguments.OutputDirectory, true); var opContext = new OperationContext(new Context(new LogWrapper(s_logger))); // init it var initialized = store.StartupAsync(opContext); initialized.Wait(); // and check if (!initialized.Result) { s_logger.Error($"Could not initialize RocksDbContentPlacementPredictionStore at [{arguments.OutputDirectory}]"); } var classifier = new ContentPlacementClassifier(arguments.AppConfig.ClassifierConfiguration); // create the pipeline. The first step here is to parse the input files, and we can do this in parallel var buildArtifactParsingBlock = new TransformManyBlock <ParseBuildArtifactsInput, KeyValuePair <string, IReadOnlyList <ArtifactWithBuildMeta> > >(i => { var action = new ParseBuildArtifacts(); var result = action.PerformAction(i); if (result.ExecutionStatus) { return(result.Result.ArtifactsByHash.ToList()); } else { s_logger.Error(result.Exception, $"Error when parsing [{i.BuildArtifactsFile}]"); throw result.Exception; } }, new ExecutionDataflowBlockOptions() { MaxDegreeOfParallelism = arguments.AppConfig.ConcurrencyConfig.MaxBuildParsingTasks } ); // then, when we have one, we linearize it var linearizeBlock = new TransformBlock <KeyValuePair <string, IReadOnlyList <ArtifactWithBuildMeta> >, TimedActionResult <LinearizeArtifactsOutput> >(i => { var action = new LinearizeArtifacts(); return(action.PerformAction(new LinearizeArtifactsInput(i.Key, i.Value))); }, new ExecutionDataflowBlockOptions() { MaxDegreeOfParallelism = arguments.AppConfig.ConcurrencyConfig.MaxArtifactLinearizationTasks } ); // and we classify them var classifyBlock = new ActionBlock <TimedActionResult <LinearizeArtifactsOutput> >(i => { // i have an ml instance here if (i.ExecutionStatus) { var cpInstance = new ContentPlacementInstance() { Artifact = i.Result.Linear.AsInstance(), // using the default utility method QueueName = i.Result.Linear.Queues.First() // the first here is enough, since its always one! }; var result = classifier.Classify(cpInstance); if (result.Succeeded) { var selectedMachines = result.Value; foreach (var path in i.Result.Linear.ReportedPaths) { store.StoreResult(opContext, path, selectedMachines); Interlocked.Add(ref sharedCount, 1); } } else { Interlocked.Add(ref nonSharedCount, 1); } } }, new ExecutionDataflowBlockOptions() { MaxDegreeOfParallelism = arguments.AppConfig.ConcurrencyConfig.MaxArtifactClassificationTasks } ); // link them var numParsingTasks = 0; buildArtifactParsingBlock.LinkTo(linearizeBlock, new DataflowLinkOptions { PropagateCompletion = true }); linearizeBlock.LinkTo(classifyBlock, new DataflowLinkOptions { PropagateCompletion = true }); // do now we can post to the initial queue foreach (var file in Directory.EnumerateFiles(arguments.InputDirectory, "*.json")) { buildArtifactParsingBlock.Post(new ParseBuildArtifactsInput(file)); ++numParsingTasks; } s_logger.Info($"Posted {numParsingTasks} parsing tasks, processing"); // now wait buildArtifactParsingBlock.Complete(); classifyBlock.Completion.Wait(); // and now we should snapshot var snapshotDir = Path.Combine(arguments.OutputDirectory, "Snap"); Directory.CreateDirectory(snapshotDir); s_logger.Info($"Done, snapshoting to [{snapshotDir}]"); var result = store.CreateSnapshot(opContext, snapshotDir); // done } finally { var total = 1.0 * (sharedCount + nonSharedCount); var percentage = (1.0 * sharedCount) / total; s_logger.Info($"Stats: shared={sharedCount} ({percentage}), nonShared={nonSharedCount}, total={total}"); } }