Пример #1
0
        /// <summary>
        /// Gets the metric values.
        /// </summary>
        /// <param name="dataLakeClient">The data lake client.</param>
        /// <param name="testContent">Content of the test.</param>
        /// <param name="sparkClient">The spark client.</param>
        /// <param name="sparkClientSettings">The spark client settings.</param>
        /// <returns>MetricValues.</returns>
        private static MetricValues GetMetricValues(
            DataLakeClient dataLakeClient,
            SparkMetricsTestContent testContent,
            SparkClient sparkClient,
            SparkClientSettings sparkClientSettings,
            CancellationToken cancellationToken)
        {
            string currentPath  = testContent.GetCurrentStreamPath();
            string previousPath = testContent.GetPreviousStreamPath();

            var current = GetMetricValue(
                testContent,
                sparkClient,
                sparkClientSettings,
                currentPath,
                dataLakeClient,
                cancellationToken);
            var previous = GetMetricValue(
                testContent,
                sparkClient,
                sparkClientSettings,
                previousPath,
                dataLakeClient,
                cancellationToken);

            var metricValues = new MetricValues
            {
                Baseline = previous,
                Current  = current
            };

            return(metricValues);
        }
Пример #2
0
        /// <summary>
        /// Processes the metrics correctness test.
        /// </summary>
        /// <param name="dataLakeClient">The data lake client.</param>
        /// <param name="testContent">Content of the test.</param>
        /// <param name="sparkClient">The spark client.</param>
        /// <param name="sparkClientSettings">The spark client settings.</param>
        /// <returns>SparkMetricsTestResult.</returns>
        /// <exception cref="NotSupportedException">ComparisonType {testContent.ComparisonType}</exception>
        private static List <MetricsTestResult> ProcessMetricsCorrectnessTest(
            DataLakeClient dataLakeClient,
            SparkMetricsTestContent testContent,
            SparkClient sparkClient,
            SparkClientSettings sparkClientSettings,
            CancellationToken cancellationToken)
        {
            MetricValues metricValues = null;

            switch (testContent.ComparisonType)
            {
            case ComparisonType.DayOverDay:
            case ComparisonType.WeekOverWeek:
            case ComparisonType.MonthOverMonth:
            case ComparisonType.YearOverYear:
                metricValues = GetMetricValues(
                    dataLakeClient,
                    testContent,
                    sparkClient,
                    sparkClientSettings,
                    cancellationToken);
                break;

            case ComparisonType.VarianceToTarget:
                var metricValue = GetMetricValue(
                    testContent,
                    sparkClient,
                    sparkClientSettings,
                    testContent.GetCurrentStreamPath(),
                    dataLakeClient,
                    cancellationToken);
                testContent.NotebookParameters["cmdText"] = testContent.NotebookParameters["targetCmdText"];
                var targetValue = GetMetricValue(
                    testContent,
                    sparkClient,
                    sparkClientSettings,
                    testContent.TargetStreamPath != null ? testContent.GetCurrentTargetStreamPath() : testContent.GetCurrentStreamPath(),
                    dataLakeClient,
                    cancellationToken);

                metricValues = new MetricValues
                {
                    Baseline = targetValue,
                    Current  = metricValue
                };
                break;

            default:
                throw new NotSupportedException($"ComparisonType {testContent.ComparisonType} not supported for SparkWorker");
            }

            var results = new List <MetricsTestResult>();

            foreach (var threshold in testContent.Thresholds)
            {
                var result = new MetricsTestResult
                {
                    ComparisonType      = testContent.ComparisonType,
                    Date                = testContent.Date,
                    BaselineMetricValue = metricValues.Baseline[threshold.Name],
                    MetricValue         = metricValues.Current[threshold.Name],
                    LowerBoundThreshold = threshold.LowerBound,
                    UpperBoundThreshold = threshold.UpperBound,
                    PercentDiff         = MetricValues.ComputePercentDiff(metricValues, threshold.Name),
                    PreviousDate        = testContent.GetPreviousDate(),
                    TestName            = testRunName,
                    TestRunId           = testRunId,
                    MetricName          = threshold.Name
                };

                results.Add(result);
            }

            return(results);
        }
Пример #3
0
        /// <summary>
        /// Gets the metric value.
        /// </summary>
        /// <param name="testContent">Content of the test.</param>
        /// <param name="sparkClient">The spark client.</param>
        /// <param name="sparkClientSettings">The spark client settings.</param>
        /// <param name="stream">The stream.</param>
        /// <param name="dataLakeClient">The data lake client.</param>
        /// <returns>System.Nullable&lt;System.Double&gt;.</returns>
        /// <exception cref="InvalidOperationException">Stream does not exist : {stream}</exception>
        private static IDictionary <string, double> GetMetricValue(
            SparkMetricsTestContent testContent,
            SparkClient sparkClient,
            SparkClientSettings sparkClientSettings,
            string stream,
            DataLakeClient dataLakeClient,
            CancellationToken cancellationToken)
        {
            if (!dataLakeClient.CheckExists(testContent.GetDatalakeStore(), stream))
            {
                throw new FileNotFoundException($"Stream does not exist : {stream}");
            }

            var sparkRequest = new SparkClientRequest
            {
                NodeType           = sparkClientSettings.NodeType,
                NumWorkersMin      = sparkClientSettings.NumWorkersMin,
                NumWorkersMax      = sparkClientSettings.NumWorkersMax,
                CostPerNode        = GetCostPerNode(sparkClientSettings.NodeTypes, sparkClientSettings.NodeType),
                Libraries          = sparkClientSettings.Libraries,
                NotebookPath       = testContent.NotebookPath,
                NotebookParameters = testContent.NotebookParameters ?? new Dictionary <string, string>(),
                TestRunId          = testRunId,
                TimeoutSeconds     = sparkClientSettings.TimeoutSeconds
            };

            Console.WriteLine($"Running notebook={testContent.NotebookPath} for DataLakeStore={testContent.GetDatalakeStore()}, Path={stream}");
            var mountPoint = GetMountPoint(testContent.GetDatalakeStore(), stream);

            Console.WriteLine($"Running notebook={testContent.NotebookPath} for mountPoint={mountPoint}");

            string streamPath = mountPoint;

            // Disable this function
            //if (testContent.ConvertToParquet)
            //{
            //    var parquetFile = messageTag.MountPointToParquetFile[mountPoint];
            //    Console.WriteLine($"Running notebook={testContent.NotebookPath} using parquetFile={parquetFile}");
            //    streamPath = parquetFile;
            //}
            //else
            //{
            //    streamPath = mountPoint;
            //}

            sparkRequest.NotebookParameters["streamPath"] = streamPath;

            Console.WriteLine($"Notebook parameters : {string.Join(", ", sparkRequest.NotebookParameters.Select(t => t.Key + "=" + t.Value))}");

            // Log request to OMS

            var response = sparkClient.RunNotebook(sparkRequest, cancellationToken);

            response.TestRunId = testRunId;

            if (response.IsRunSuccess())
            {
                // For format reference see:
                // https://pandas.pydata.org/pandas-docs/version/0.24.2/reference/api/pandas.DataFrame.to_json.html
                var resultDataFrame  = JsonConvert.DeserializeObject <Dictionary <string, Dictionary <string, double> > >(response.RunOutput);
                var resultDictionary = new Dictionary <string, double>();
                foreach (var pair in resultDataFrame)
                {
                    // pair.Value is the column name
                    if (pair.Value == null || pair.Value.Count == 0)
                    {
                        throw new InvalidOperationException("Result does not contain any rows");
                    }

                    // We take the first row only
                    resultDictionary.Add(pair.Key, pair.Value.First().Value);
                }
                return(resultDictionary);
            }
            else
            {
                Console.WriteLine("Error getting metric.");
                Console.WriteLine(JObject.Parse(JsonConvert.SerializeObject(response)));
                throw new Exception($"Error getting metric. TestRun = {testRunId}, Spark job {response?.Run?.RunId} failed");
            }
        }