コード例 #1
0
        private static void ClassifyInstances(Args arguments)
        {
            // and a couple of checks here
            Contract.Requires(arguments.OutputDirectory != null, "You must specify an output directory");
            Contract.Requires(arguments.InputDirectory != null, "You must specify an input directory");
            Contract.Requires(Directory.Exists(arguments.OutputDirectory), "The output directory must exist");
            Contract.Requires(Directory.Exists(arguments.InputDirectory), "The input directory must exist");
            var sharedCount    = 0;
            var nonSharedCount = 0;

            try
            {
                // load the classifier first
                s_logger.Info("Loading classifier...");
                // work on the content store
                s_logger.Info($"Initializing store at [{arguments.OutputDirectory}]");
                var store     = new RocksDbContentPlacementPredictionStore(arguments.OutputDirectory, true);
                var opContext = new OperationContext(new Context(new LogWrapper(s_logger)));
                // init it
                var initialized = store.StartupAsync(opContext);
                initialized.Wait();
                // and check
                if (!initialized.Result)
                {
                    s_logger.Error($"Could not initialize RocksDbContentPlacementPredictionStore at [{arguments.OutputDirectory}]");
                }
                var classifier = new ContentPlacementClassifier(arguments.AppConfig.ClassifierConfiguration);
                // create the pipeline. The first step here is to parse the input files, and we can do this in parallel
                var buildArtifactParsingBlock = new TransformManyBlock <ParseBuildArtifactsInput, KeyValuePair <string, IReadOnlyList <ArtifactWithBuildMeta> > >(i =>
                {
                    var action = new ParseBuildArtifacts();
                    var result = action.PerformAction(i);
                    if (result.ExecutionStatus)
                    {
                        return(result.Result.ArtifactsByHash.ToList());
                    }
                    else
                    {
                        s_logger.Error(result.Exception, $"Error when parsing [{i.BuildArtifactsFile}]");
                        throw result.Exception;
                    }
                },
                                                                                                                                                                  new ExecutionDataflowBlockOptions()
                {
                    MaxDegreeOfParallelism = arguments.AppConfig.ConcurrencyConfig.MaxBuildParsingTasks
                }
                                                                                                                                                                  );
                // then, when we have one, we linearize it
                var linearizeBlock = new TransformBlock <KeyValuePair <string, IReadOnlyList <ArtifactWithBuildMeta> >, TimedActionResult <LinearizeArtifactsOutput> >(i =>
                {
                    var action = new LinearizeArtifacts();
                    return(action.PerformAction(new LinearizeArtifactsInput(i.Key, i.Value)));
                },
                                                                                                                                                                       new ExecutionDataflowBlockOptions()
                {
                    MaxDegreeOfParallelism = arguments.AppConfig.ConcurrencyConfig.MaxArtifactLinearizationTasks
                }
                                                                                                                                                                       );

                // and we classify them
                var classifyBlock = new ActionBlock <TimedActionResult <LinearizeArtifactsOutput> >(i =>
                {
                    // i have an ml instance here
                    if (i.ExecutionStatus)
                    {
                        var cpInstance = new ContentPlacementInstance()
                        {
                            Artifact  = i.Result.Linear.AsInstance(),  // using the default utility method
                            QueueName = i.Result.Linear.Queues.First() // the first here is enough, since its always one!
                        };
                        var result = classifier.Classify(cpInstance);
                        if (result.Succeeded)
                        {
                            var selectedMachines = result.Value;
                            foreach (var path in i.Result.Linear.ReportedPaths)
                            {
                                store.StoreResult(opContext, path, selectedMachines);
                                Interlocked.Add(ref sharedCount, 1);
                            }
                        }
                        else
                        {
                            Interlocked.Add(ref nonSharedCount, 1);
                        }
                    }
                },
                                                                                                    new ExecutionDataflowBlockOptions()
                {
                    MaxDegreeOfParallelism = arguments.AppConfig.ConcurrencyConfig.MaxArtifactClassificationTasks
                }
                                                                                                    );
                // link them
                var numParsingTasks = 0;
                buildArtifactParsingBlock.LinkTo(linearizeBlock, new DataflowLinkOptions {
                    PropagateCompletion = true
                });
                linearizeBlock.LinkTo(classifyBlock, new DataflowLinkOptions {
                    PropagateCompletion = true
                });
                // do now we can post to the initial queue
                foreach (var file in Directory.EnumerateFiles(arguments.InputDirectory, "*.json"))
                {
                    buildArtifactParsingBlock.Post(new ParseBuildArtifactsInput(file));
                    ++numParsingTasks;
                }
                s_logger.Info($"Posted {numParsingTasks} parsing tasks, processing");
                // now wait
                buildArtifactParsingBlock.Complete();
                classifyBlock.Completion.Wait();
                // and now we should snapshot
                var snapshotDir = Path.Combine(arguments.OutputDirectory, "Snap");
                Directory.CreateDirectory(snapshotDir);
                s_logger.Info($"Done, snapshoting to [{snapshotDir}]");
                var result = store.CreateSnapshot(opContext, snapshotDir);
                // done
            }
            finally
            {
                var total      = 1.0 * (sharedCount + nonSharedCount);
                var percentage = (1.0 * sharedCount) / total;
                s_logger.Info($"Stats: shared={sharedCount} ({percentage}), nonShared={nonSharedCount}, total={total}");
            }
        }
コード例 #2
0
ファイル: DataConsolidator.cs プロジェクト: uilit/BuildXL
        private static void EvaluateContentPlacementClassifier(Args arguments)
        {
            Contract.Requires(arguments.InputDirectory != null, "You must specify an input directory");
            Contract.Requires(Directory.Exists(arguments.InputDirectory), "The input directory must exist");
            var configurationFile = $"{Path.Combine(arguments.InputDirectory, "classifier.json")}";

            s_logger.Info($"Evaluating classifier from [{configurationFile}]");
            // approx memory consumption and check load time
            var initialMemory = GC.GetTotalMemory(true);
            var load          = Stopwatch.StartNew();
            var classifier    = new ContentPlacementClassifier(configurationFile);

            load.Stop();
            var consumedMemory = GC.GetTotalMemory(false) - initialMemory;

            s_logger.Info($"Classifier loaded in {load.ElapsedMilliseconds}ms, approxBytes={consumedMemory}");
            var numInstances = 0;
            var random       = new Random(Environment.TickCount);
            // read some queue names
            var qNames         = new List <string>();
            var instances      = new Dictionary <ContentPlacementInstance, List <string> >();
            var uniqueMachines = new HashSet <string>();

            foreach (var qq in Directory.EnumerateFiles(Path.Combine(arguments.InputDirectory, "QueueMap")))
            {
                qNames.Add(Path.GetFileNameWithoutExtension(qq));
                ++numInstances;
            }
            // now test for some instances. Just some random instances, one per queue
            var ns       = 0;
            var na       = 0;
            var classify = Stopwatch.StartNew();

            foreach (var queueName in qNames)
            {
                var instance = new ContentPlacementInstance()
                {
                    QueueName = queueName,
                    Artifact  = new RandomForestInstance()
                    {
                        Attributes = new Dictionary <string, double>()
                        {
                            ["SizeBytes"]                      = random.Next(0, 1000000000),
                            ["AvgInputPips"]                   = random.Next(0, 100000),
                            ["AvgOutputPips"]                  = random.Next(0, 100000),
                            ["AvgPositionForInputPips"]        = random.NextDouble(),
                            ["AvgPositionForOutputPips"]       = random.NextDouble(),
                            ["AvgDepsForInputPips"]            = random.Next(0, 10000),
                            ["AvgDepsForOutputPips"]           = random.Next(0, 10000),
                            ["AvgInputsForInputPips"]          = random.Next(0, 100000),
                            ["AvgInputsForOutputPips"]         = random.Next(0, 100000),
                            ["AvgOutputsForInputPips"]         = random.Next(0, 100000),
                            ["AvgOutputsForOutputPips"]        = random.Next(0, 100000),
                            ["AvgPriorityForInputPips"]        = random.Next(0, 100),
                            ["AvgPriorityForOutputPips"]       = random.Next(0, 100),
                            ["AvgWeightForInputPips"]          = random.Next(0, 100),
                            ["AvgWeightForOutputPips"]         = random.Next(0, 100),
                            ["AvgTagCountForInputPips"]        = random.Next(0, 100),
                            ["AvgTagCountForOutputPips"]       = random.Next(0, 100),
                            ["AvgSemaphoreCountForInputPips"]  = random.Next(0, 100),
                            ["AvgSemaphoreCountForOutputPips"] = random.Next(0, 100)
                        }
                    }
                };

                var result = classifier.Classify(instance);

                if (result.Succeeded)
                {
                    instances.Add(instance, result.Value);
                }

                switch (result.ReturnCode)
                {
                case ContentPlacementClassifierResult.ResultCode.ArtifactNotShared:
                    ns++;
                    break;

                case ContentPlacementClassifierResult.ResultCode.NoAlternativesForQueue:
                    na++;
                    break;

                default:
                    break;
                }
            }
            classify.Stop();
            s_logger.Info($"Classifier ({numInstances} instances, {ns} not shared, {na} without alternatives) done in {classify.ElapsedMilliseconds}ms (perInstanceAvg={(1.0 * classify.ElapsedMilliseconds) / (1.0 * numInstances)}ms)");
            foreach (var kvp in instances)
            {
                var instance         = kvp.Key;
                var predictedClasses = kvp.Value;
                var unique           = new HashSet <string>(predictedClasses).Count;
                var real             = predictedClasses.Count;
                if (unique != real)
                {
                    Console.ForegroundColor = ConsoleColor.Yellow;
                }
                s_logger.Info($"queue={instance.QueueName}, count={real}, uniqueCount={unique}, alternatives=[{string.Join(",", predictedClasses)}]");
                Console.ResetColor();
                uniqueMachines.AddRange(predictedClasses);
            }
            foreach (var qq in classifier.AlternativesPerQueue())
            {
                uniqueMachines.AddRange(qq.Value);
            }
            s_logger.Info($"totalMachinesAvailable={uniqueMachines.Count}, avg={(1.0 * uniqueMachines.Count) /(1.0 * classifier.AlternativesPerQueue().Count)} per queue");
        }