Ejemplo n.º 1
0
        private Task InitializePredictionStoreAsync(OperationContext context)
        {
            return(context.PerformOperationAsync(
                       Tracer,
                       async() =>
            {
                var centralStorage = (ContentLocationStore as TransitioningContentLocationStore)?.LocalLocationStore?.CentralStorage;

                if (Settings.ContentPlacementPredictionsBlob != null)
                {
                    var checkpointDirectory = Path.Combine(LocalCacheRootMachineLocation.Path, "PlacementPredictions");
                    _predictionStore = new RocksDbContentPlacementPredictionStore(checkpointDirectory, clean: false);
                    await _predictionStore.StartupAsync(context).ThrowIfFailure();

                    var fileName = Path.Combine(checkpointDirectory, PredictionBlobNameFile);
                    if (!File.Exists(fileName) || File.ReadAllText(fileName) != Settings.ContentPlacementPredictionsBlob)
                    {
                        Directory.Delete(checkpointDirectory);

                        Directory.CreateDirectory(checkpointDirectory);

                        var zipFile = Path.Combine(checkpointDirectory, "snapshot.zip");
                        await centralStorage.TryGetFileAsync(context, Settings.ContentPlacementPredictionsBlob, new AbsolutePath(zipFile)).ThrowIfFailure();
                        _predictionStore.UncompressSnapshot(context, zipFile).ThrowIfFailure();
                    }
                }

                return BoolResult.Success;
            }));
        }
Ejemplo n.º 2
0
        private static void ClassifyInstances(Args arguments)
        {
            // and a couple of checks here
            Contract.Requires(arguments.OutputDirectory != null, "You must specify an output directory");
            Contract.Requires(arguments.InputDirectory != null, "You must specify an input directory");
            Contract.Requires(Directory.Exists(arguments.OutputDirectory), "The output directory must exist");
            Contract.Requires(Directory.Exists(arguments.InputDirectory), "The input directory must exist");
            var sharedCount    = 0;
            var nonSharedCount = 0;

            try
            {
                // load the classifier first
                s_logger.Info("Loading classifier...");
                // work on the content store
                s_logger.Info($"Initializing store at [{arguments.OutputDirectory}]");
                var store     = new RocksDbContentPlacementPredictionStore(arguments.OutputDirectory, true);
                var opContext = new OperationContext(new Context(new LogWrapper(s_logger)));
                // init it
                var initialized = store.StartupAsync(opContext);
                initialized.Wait();
                // and check
                if (!initialized.Result)
                {
                    s_logger.Error($"Could not initialize RocksDbContentPlacementPredictionStore at [{arguments.OutputDirectory}]");
                }
                var classifier = new ContentPlacementClassifier(arguments.AppConfig.ClassifierConfiguration);
                // create the pipeline. The first step here is to parse the input files, and we can do this in parallel
                var buildArtifactParsingBlock = new TransformManyBlock <ParseBuildArtifactsInput, KeyValuePair <string, IReadOnlyList <ArtifactWithBuildMeta> > >(i =>
                {
                    var action = new ParseBuildArtifacts();
                    var result = action.PerformAction(i);
                    if (result.ExecutionStatus)
                    {
                        return(result.Result.ArtifactsByHash.ToList());
                    }
                    else
                    {
                        s_logger.Error(result.Exception, $"Error when parsing [{i.BuildArtifactsFile}]");
                        throw result.Exception;
                    }
                },
                                                                                                                                                                  new ExecutionDataflowBlockOptions()
                {
                    MaxDegreeOfParallelism = arguments.AppConfig.ConcurrencyConfig.MaxBuildParsingTasks
                }
                                                                                                                                                                  );
                // then, when we have one, we linearize it
                var linearizeBlock = new TransformBlock <KeyValuePair <string, IReadOnlyList <ArtifactWithBuildMeta> >, TimedActionResult <LinearizeArtifactsOutput> >(i =>
                {
                    var action = new LinearizeArtifacts();
                    return(action.PerformAction(new LinearizeArtifactsInput(i.Key, i.Value)));
                },
                                                                                                                                                                       new ExecutionDataflowBlockOptions()
                {
                    MaxDegreeOfParallelism = arguments.AppConfig.ConcurrencyConfig.MaxArtifactLinearizationTasks
                }
                                                                                                                                                                       );

                // and we classify them
                var classifyBlock = new ActionBlock <TimedActionResult <LinearizeArtifactsOutput> >(i =>
                {
                    // i have an ml instance here
                    if (i.ExecutionStatus)
                    {
                        var cpInstance = new ContentPlacementInstance()
                        {
                            Artifact  = i.Result.Linear.AsInstance(),  // using the default utility method
                            QueueName = i.Result.Linear.Queues.First() // the first here is enough, since its always one!
                        };
                        var result = classifier.Classify(cpInstance);
                        if (result.Succeeded)
                        {
                            var selectedMachines = result.Value;
                            foreach (var path in i.Result.Linear.ReportedPaths)
                            {
                                store.StoreResult(opContext, path, selectedMachines);
                                Interlocked.Add(ref sharedCount, 1);
                            }
                        }
                        else
                        {
                            Interlocked.Add(ref nonSharedCount, 1);
                        }
                    }
                },
                                                                                                    new ExecutionDataflowBlockOptions()
                {
                    MaxDegreeOfParallelism = arguments.AppConfig.ConcurrencyConfig.MaxArtifactClassificationTasks
                }
                                                                                                    );
                // link them
                var numParsingTasks = 0;
                buildArtifactParsingBlock.LinkTo(linearizeBlock, new DataflowLinkOptions {
                    PropagateCompletion = true
                });
                linearizeBlock.LinkTo(classifyBlock, new DataflowLinkOptions {
                    PropagateCompletion = true
                });
                // do now we can post to the initial queue
                foreach (var file in Directory.EnumerateFiles(arguments.InputDirectory, "*.json"))
                {
                    buildArtifactParsingBlock.Post(new ParseBuildArtifactsInput(file));
                    ++numParsingTasks;
                }
                s_logger.Info($"Posted {numParsingTasks} parsing tasks, processing");
                // now wait
                buildArtifactParsingBlock.Complete();
                classifyBlock.Completion.Wait();
                // and now we should snapshot
                var snapshotDir = Path.Combine(arguments.OutputDirectory, "Snap");
                Directory.CreateDirectory(snapshotDir);
                s_logger.Info($"Done, snapshoting to [{snapshotDir}]");
                var result = store.CreateSnapshot(opContext, snapshotDir);
                // done
            }
            finally
            {
                var total      = 1.0 * (sharedCount + nonSharedCount);
                var percentage = (1.0 * sharedCount) / total;
                s_logger.Info($"Stats: shared={sharedCount} ({percentage}), nonShared={nonSharedCount}, total={total}");
            }
        }