Esempio n. 1
0
        private static void CreateDatabase(Args arguments)
        {
            // and a couple of checks here
            Contract.Requires(arguments.OutputDirectory != null, "You must specify an output directory");
            Contract.Requires(arguments.InputDirectory != null, "You must specify an input directory");
            Contract.Requires(Directory.Exists(arguments.OutputDirectory), "The output directory must exist");
            Contract.Requires(Directory.Exists(arguments.InputDirectory), "The input directory must exist");
            // create the pipeline. The first step here is to parse the input files, and we can do this in parallel
            var buildArtifactParsingBlock = new TransformBlock <ParseBuildArtifactsInput, TimedActionResult <ParseBuildArtifactsOutput> >(i =>
            {
                var action = new ParseBuildArtifacts();
                return(action.PerformAction(i));
            },
                                                                                                                                          new ExecutionDataflowBlockOptions()
            {
                MaxDegreeOfParallelism = arguments.AppConfig.ConcurrencyConfig.MaxBuildParsingTasks
            }
                                                                                                                                          );
            // the second is to save artifacts in a central folder
            var storeArtifactBlock = new ActionBlock <TimedActionResult <ParseBuildArtifactsOutput> >(i =>
            {
                // the exception will be logged even if we dont do it here
                if (i.ExecutionStatus)
                {
                    var action = new StoreBuildArtifacts(arguments.OutputDirectory);
                    action.PerformAction(i.Result);
                }
            },
                                                                                                      new ExecutionDataflowBlockOptions()
            {
                MaxDegreeOfParallelism = arguments.AppConfig.ConcurrencyConfig.MaxArtifactStoreTasks
            }
                                                                                                      );
            // link them
            var numParsingTasks = 0;

            buildArtifactParsingBlock.LinkTo(storeArtifactBlock, new DataflowLinkOptions {
                PropagateCompletion = true
            });
            // do now we can post to the initial queue
            foreach (var file in Directory.EnumerateFiles(arguments.InputDirectory, "*.json"))
            {
                buildArtifactParsingBlock.Post(new ParseBuildArtifactsInput(file));
                ++numParsingTasks;
            }
            s_logger.Info($"Posted {numParsingTasks} parsing tasks, processing");
            // now wait
            buildArtifactParsingBlock.Complete();
            storeArtifactBlock.Completion.Wait();
            // done
        }
Esempio n. 2
0
        private static void ClassifyInstances(Args arguments)
        {
            // and a couple of checks here
            Contract.Requires(arguments.OutputDirectory != null, "You must specify an output directory");
            Contract.Requires(arguments.InputDirectory != null, "You must specify an input directory");
            Contract.Requires(Directory.Exists(arguments.OutputDirectory), "The output directory must exist");
            Contract.Requires(Directory.Exists(arguments.InputDirectory), "The input directory must exist");
            var sharedCount    = 0;
            var nonSharedCount = 0;

            try
            {
                // load the classifier first
                s_logger.Info("Loading classifier...");
                // work on the content store
                s_logger.Info($"Initializing store at [{arguments.OutputDirectory}]");
                var store     = new RocksDbContentPlacementPredictionStore(arguments.OutputDirectory, true);
                var opContext = new OperationContext(new Context(new LogWrapper(s_logger)));
                // init it
                var initialized = store.StartupAsync(opContext);
                initialized.Wait();
                // and check
                if (!initialized.Result)
                {
                    s_logger.Error($"Could not initialize RocksDbContentPlacementPredictionStore at [{arguments.OutputDirectory}]");
                }
                var classifier = new ContentPlacementClassifier(arguments.AppConfig.ClassifierConfiguration);
                // create the pipeline. The first step here is to parse the input files, and we can do this in parallel
                var buildArtifactParsingBlock = new TransformManyBlock <ParseBuildArtifactsInput, KeyValuePair <string, IReadOnlyList <ArtifactWithBuildMeta> > >(i =>
                {
                    var action = new ParseBuildArtifacts();
                    var result = action.PerformAction(i);
                    if (result.ExecutionStatus)
                    {
                        return(result.Result.ArtifactsByHash.ToList());
                    }
                    else
                    {
                        s_logger.Error(result.Exception, $"Error when parsing [{i.BuildArtifactsFile}]");
                        throw result.Exception;
                    }
                },
                                                                                                                                                                  new ExecutionDataflowBlockOptions()
                {
                    MaxDegreeOfParallelism = arguments.AppConfig.ConcurrencyConfig.MaxBuildParsingTasks
                }
                                                                                                                                                                  );
                // then, when we have one, we linearize it
                var linearizeBlock = new TransformBlock <KeyValuePair <string, IReadOnlyList <ArtifactWithBuildMeta> >, TimedActionResult <LinearizeArtifactsOutput> >(i =>
                {
                    var action = new LinearizeArtifacts();
                    return(action.PerformAction(new LinearizeArtifactsInput(i.Key, i.Value)));
                },
                                                                                                                                                                       new ExecutionDataflowBlockOptions()
                {
                    MaxDegreeOfParallelism = arguments.AppConfig.ConcurrencyConfig.MaxArtifactLinearizationTasks
                }
                                                                                                                                                                       );

                // and we classify them
                var classifyBlock = new ActionBlock <TimedActionResult <LinearizeArtifactsOutput> >(i =>
                {
                    // i have an ml instance here
                    if (i.ExecutionStatus)
                    {
                        var cpInstance = new ContentPlacementInstance()
                        {
                            Artifact  = i.Result.Linear.AsInstance(),  // using the default utility method
                            QueueName = i.Result.Linear.Queues.First() // the first here is enough, since its always one!
                        };
                        var result = classifier.Classify(cpInstance);
                        if (result.Succeeded)
                        {
                            var selectedMachines = result.Value;
                            foreach (var path in i.Result.Linear.ReportedPaths)
                            {
                                store.StoreResult(opContext, path, selectedMachines);
                                Interlocked.Add(ref sharedCount, 1);
                            }
                        }
                        else
                        {
                            Interlocked.Add(ref nonSharedCount, 1);
                        }
                    }
                },
                                                                                                    new ExecutionDataflowBlockOptions()
                {
                    MaxDegreeOfParallelism = arguments.AppConfig.ConcurrencyConfig.MaxArtifactClassificationTasks
                }
                                                                                                    );
                // link them
                var numParsingTasks = 0;
                buildArtifactParsingBlock.LinkTo(linearizeBlock, new DataflowLinkOptions {
                    PropagateCompletion = true
                });
                linearizeBlock.LinkTo(classifyBlock, new DataflowLinkOptions {
                    PropagateCompletion = true
                });
                // do now we can post to the initial queue
                foreach (var file in Directory.EnumerateFiles(arguments.InputDirectory, "*.json"))
                {
                    buildArtifactParsingBlock.Post(new ParseBuildArtifactsInput(file));
                    ++numParsingTasks;
                }
                s_logger.Info($"Posted {numParsingTasks} parsing tasks, processing");
                // now wait
                buildArtifactParsingBlock.Complete();
                classifyBlock.Completion.Wait();
                // and now we should snapshot
                var snapshotDir = Path.Combine(arguments.OutputDirectory, "Snap");
                Directory.CreateDirectory(snapshotDir);
                s_logger.Info($"Done, snapshoting to [{snapshotDir}]");
                var result = store.CreateSnapshot(opContext, snapshotDir);
                // done
            }
            finally
            {
                var total      = 1.0 * (sharedCount + nonSharedCount);
                var percentage = (1.0 * sharedCount) / total;
                s_logger.Info($"Stats: shared={sharedCount} ({percentage}), nonShared={nonSharedCount}, total={total}");
            }
        }