/// <summary>
        /// Gets the metric values.
        /// </summary>
        /// <param name="dataLakeClient">The data lake client.</param>
        /// <param name="testContent">Content of the test.</param>
        /// <param name="sparkClient">The spark client.</param>
        /// <param name="sparkClientSettings">The spark client settings.</param>
        /// <returns>MetricValues.</returns>
        private static MetricValues GetMetricValues(
            DataLakeClient dataLakeClient,
            SparkMetricsTestContent testContent,
            SparkClient sparkClient,
            SparkClientSettings sparkClientSettings,
            CancellationToken cancellationToken)
        {
            string currentPath  = testContent.GetCurrentStreamPath();
            string previousPath = testContent.GetPreviousStreamPath();

            var current = GetMetricValue(
                testContent,
                sparkClient,
                sparkClientSettings,
                currentPath,
                dataLakeClient,
                cancellationToken);
            var previous = GetMetricValue(
                testContent,
                sparkClient,
                sparkClientSettings,
                previousPath,
                dataLakeClient,
                cancellationToken);

            var metricValues = new MetricValues
            {
                Baseline = previous,
                Current  = current
            };

            return(metricValues);
        }
示例#2
0
 public Configuration(
     ISecretProvider secretProvider = null,
     DatabricksClientWrapper databricksClientWrapper = null,
     DataLakeClient dataLakeClient = null)
 {
     this.DataLakeClient          = dataLakeClient;
     this.DatabricksClientWrapper = databricksClientWrapper;
 }
        /// <summary>
        /// Processes the metrics correctness test.
        /// </summary>
        /// <param name="dataLakeClient">The data lake client.</param>
        /// <param name="testContent">Content of the test.</param>
        /// <param name="sparkClient">The spark client.</param>
        /// <param name="sparkClientSettings">The spark client settings.</param>
        /// <returns>SparkMetricsTestResult.</returns>
        /// <exception cref="NotSupportedException">ComparisonType {testContent.ComparisonType}</exception>
        private static List <MetricsTestResult> ProcessMetricsCorrectnessTest(
            DataLakeClient dataLakeClient,
            SparkMetricsTestContent testContent,
            SparkClient sparkClient,
            SparkClientSettings sparkClientSettings,
            CancellationToken cancellationToken)
        {
            MetricValues metricValues = null;

            switch (testContent.ComparisonType)
            {
            case ComparisonType.DayOverDay:
            case ComparisonType.WeekOverWeek:
            case ComparisonType.MonthOverMonth:
            case ComparisonType.YearOverYear:
                metricValues = GetMetricValues(
                    dataLakeClient,
                    testContent,
                    sparkClient,
                    sparkClientSettings,
                    cancellationToken);
                break;

            case ComparisonType.VarianceToTarget:
                var metricValue = GetMetricValue(
                    testContent,
                    sparkClient,
                    sparkClientSettings,
                    testContent.GetCurrentStreamPath(),
                    dataLakeClient,
                    cancellationToken);
                testContent.NotebookParameters["cmdText"] = testContent.NotebookParameters["targetCmdText"];
                var targetValue = GetMetricValue(
                    testContent,
                    sparkClient,
                    sparkClientSettings,
                    testContent.TargetStreamPath != null ? testContent.GetCurrentTargetStreamPath() : testContent.GetCurrentStreamPath(),
                    dataLakeClient,
                    cancellationToken);

                metricValues = new MetricValues
                {
                    Baseline = targetValue,
                    Current  = metricValue
                };
                break;

            default:
                throw new NotSupportedException($"ComparisonType {testContent.ComparisonType} not supported for SparkWorker");
            }

            var results = new List <MetricsTestResult>();

            foreach (var threshold in testContent.Thresholds)
            {
                var result = new MetricsTestResult
                {
                    ComparisonType      = testContent.ComparisonType,
                    Date                = testContent.Date,
                    BaselineMetricValue = metricValues.Baseline[threshold.Name],
                    MetricValue         = metricValues.Current[threshold.Name],
                    LowerBoundThreshold = threshold.LowerBound,
                    UpperBoundThreshold = threshold.UpperBound,
                    PercentDiff         = MetricValues.ComputePercentDiff(metricValues, threshold.Name),
                    PreviousDate        = testContent.GetPreviousDate(),
                    TestName            = testRunName,
                    TestRunId           = testRunId,
                    MetricName          = threshold.Name
                };

                results.Add(result);
            }

            return(results);
        }
        /// <summary>
        /// Gets the metric value.
        /// </summary>
        /// <param name="testContent">Content of the test.</param>
        /// <param name="sparkClient">The spark client.</param>
        /// <param name="sparkClientSettings">The spark client settings.</param>
        /// <param name="stream">The stream.</param>
        /// <param name="dataLakeClient">The data lake client.</param>
        /// <returns>System.Nullable&lt;System.Double&gt;.</returns>
        /// <exception cref="InvalidOperationException">Stream does not exist : {stream}</exception>
        private static IDictionary <string, double> GetMetricValue(
            SparkMetricsTestContent testContent,
            SparkClient sparkClient,
            SparkClientSettings sparkClientSettings,
            string stream,
            DataLakeClient dataLakeClient,
            CancellationToken cancellationToken)
        {
            if (!dataLakeClient.CheckExists(testContent.GetDatalakeStore(), stream))
            {
                throw new FileNotFoundException($"Stream does not exist : {stream}");
            }

            var sparkRequest = new SparkClientRequest
            {
                NodeType           = sparkClientSettings.NodeType,
                NumWorkersMin      = sparkClientSettings.NumWorkersMin,
                NumWorkersMax      = sparkClientSettings.NumWorkersMax,
                CostPerNode        = GetCostPerNode(sparkClientSettings.NodeTypes, sparkClientSettings.NodeType),
                Libraries          = sparkClientSettings.Libraries,
                NotebookPath       = testContent.NotebookPath,
                NotebookParameters = testContent.NotebookParameters ?? new Dictionary <string, string>(),
                TestRunId          = testRunId,
                TimeoutSeconds     = sparkClientSettings.TimeoutSeconds
            };

            Console.WriteLine($"Running notebook={testContent.NotebookPath} for DataLakeStore={testContent.GetDatalakeStore()}, Path={stream}");
            var mountPoint = GetMountPoint(testContent.GetDatalakeStore(), stream);

            Console.WriteLine($"Running notebook={testContent.NotebookPath} for mountPoint={mountPoint}");

            string streamPath = mountPoint;

            // Disable this function
            //if (testContent.ConvertToParquet)
            //{
            //    var parquetFile = messageTag.MountPointToParquetFile[mountPoint];
            //    Console.WriteLine($"Running notebook={testContent.NotebookPath} using parquetFile={parquetFile}");
            //    streamPath = parquetFile;
            //}
            //else
            //{
            //    streamPath = mountPoint;
            //}

            sparkRequest.NotebookParameters["streamPath"] = streamPath;

            Console.WriteLine($"Notebook parameters : {string.Join(", ", sparkRequest.NotebookParameters.Select(t => t.Key + "=" + t.Value))}");

            // Log request to OMS

            var response = sparkClient.RunNotebook(sparkRequest, cancellationToken);

            response.TestRunId = testRunId;

            if (response.IsRunSuccess())
            {
                // For format reference see:
                // https://pandas.pydata.org/pandas-docs/version/0.24.2/reference/api/pandas.DataFrame.to_json.html
                var resultDataFrame  = JsonConvert.DeserializeObject <Dictionary <string, Dictionary <string, double> > >(response.RunOutput);
                var resultDictionary = new Dictionary <string, double>();
                foreach (var pair in resultDataFrame)
                {
                    // pair.Value is the column name
                    if (pair.Value == null || pair.Value.Count == 0)
                    {
                        throw new InvalidOperationException("Result does not contain any rows");
                    }

                    // We take the first row only
                    resultDictionary.Add(pair.Key, pair.Value.First().Value);
                }
                return(resultDictionary);
            }
            else
            {
                Console.WriteLine("Error getting metric.");
                Console.WriteLine(JObject.Parse(JsonConvert.SerializeObject(response)));
                throw new Exception($"Error getting metric. TestRun = {testRunId}, Spark job {response?.Run?.RunId} failed");
            }
        }
示例#5
0
        public static async Task Run(
            [TimerTrigger("%RedisDataPumpSchedule%", RunOnStartup = true)] TimerInfo myTimer,
            ILogger log)
        {
            // Get startTicks
            long startTicks = DateTime.UtcNow.Ticks;

            using (log.BeginScope(logProperties))
                // This DataLake client is used for writing exceptions that occur inside RedisDataPump. Do NOT remove it.
                using (DataLakeClient dataLake = new DataLakeClient(log))
                {
                    try
                    {
                        List <Task> tasks = new List <Task>();

                        // Pump and dump Redis lists (default: res-datalake)
                        tasks.Add(new DataLakeClient(log).FlushRedis(startTicks, 55000));

                        // Named
                        tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.AccountsPayableInvoiceInbound)).FlushRedis(startTicks, 55000));
                        tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.AccountsPayableInvoiceOutbound)).FlushRedis(startTicks, 55000));
                        tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.AccountsPayablePayInbound)).FlushRedis(startTicks, 55000));
                        tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.AdvanceShippingNoticeInbound)).FlushRedis(startTicks, 55000));
                        tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.Adyen)).FlushRedis(startTicks, 55000));
                        tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.Coupon)).FlushRedis(startTicks, 55000));
                        tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.CouponDeliveryOutbound)).FlushRedis(startTicks, 55000));
                        tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.CustomerInbound)).FlushRedis(startTicks, 55000));
                        tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.CustomerOutbound)).FlushRedis(startTicks, 55000));
                        tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.DropshipOrderInbound)).FlushRedis(startTicks, 55000));
                        tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.EdiPurchaseOrder)).FlushRedis(startTicks, 55000));
                        tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.EmployeeInbound)).FlushRedis(startTicks, 55000));
                        tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.FraudOutbound)).FlushRedis(startTicks, 55000));
                        tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.GeneralLedgerOutbound)).FlushRedis(startTicks, 55000));
                        tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.GenericApi)).FlushRedis(startTicks, 55000));
                        tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.GenericInbound)).FlushRedis(startTicks, 55000));
                        tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.GenericOutbound)).FlushRedis(startTicks, 55000));
                        tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.Invalid)).FlushRedis(startTicks, 55000));
                        tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.InventoryInbound)).FlushRedis(startTicks, 55000));
                        tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.InventoryOutbound)).FlushRedis(startTicks, 55000));
                        tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.Loyalty)).FlushRedis(startTicks, 55000));
                        tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.LoyaltyInbound)).FlushRedis(startTicks, 55000));
                        tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.LoyaltyOutbound)).FlushRedis(startTicks, 55000));
                        tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.MapleLake)).FlushRedis(startTicks, 55000));
                        tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.PricingOutbound)).FlushRedis(startTicks, 55000));
                        tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.ProductInbound)).FlushRedis(startTicks, 55000));
                        tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.ProductOutbound)).FlushRedis(startTicks, 55000));
                        tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.PurchaseAgreementOutbound)).FlushRedis(startTicks, 55000));
                        tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.PurchaseOrderConfirmInbound)).FlushRedis(startTicks, 55000));
                        tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.PurchaseOrderInbound)).FlushRedis(startTicks, 55000));
                        tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.SalesOrderConfirmOutbound)).FlushRedis(startTicks, 55000));
                        tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.SalesOrderDropshipOutbound)).FlushRedis(startTicks, 55000));
                        tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.SalesOrderInbound)).FlushRedis(startTicks, 55000));
                        tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.SalesOrderOutbound)).FlushRedis(startTicks, 55000));
                        tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.SalesOrderReturnInbound)).FlushRedis(startTicks, 55000));
                        tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.SalesOrderReturnOutbound)).FlushRedis(startTicks, 55000));
                        tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.SalesOrderSurveyOutbound)).FlushRedis(startTicks, 55000));
                        tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.SapReturnOrderInbound)).FlushRedis(startTicks, 55000));
                        tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.SapSalesOrderUpdateInbound)).FlushRedis(startTicks, 55000));
                        tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.SupportImport)).FlushRedis(startTicks, 55000));
                        tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.SvsGiftCardOutbound)).FlushRedis(startTicks, 55000));
                        tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.TaxwareOutbound)).FlushRedis(startTicks, 55000));
                        tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.TradeAgreementInbound_Map)).FlushRedis(startTicks, 55000));
                        tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.TradeAgreementInbound_Purch)).FlushRedis(startTicks, 55000));
                        tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.TradeAgreementOutbound)).FlushRedis(startTicks, 55000));

                        // Wait for all Tasks
                        Task.WaitAll(tasks.ToArray());
                    }
                    catch (Exception ex)
                    {
                        await RecoveryMessage.SendAsync(new RecoveryMessage()
                        {
                            ServiceName   = ServiceName.DataLake,
                            FamilyName    = Extension.ToString(FamilyName.DataLake),
                            FunctionName  = nameof(RedisDataPump),
                            MethodName    = nameof(Run),
                            ResourceGroup = Extension.ToString(Res.ServiceName.DataLake),
                            Exception     = ex,
                        }, dataLake, log);
                    }
                }
        }