/// <summary> /// Gets the metric values. /// </summary> /// <param name="dataLakeClient">The data lake client.</param> /// <param name="testContent">Content of the test.</param> /// <param name="sparkClient">The spark client.</param> /// <param name="sparkClientSettings">The spark client settings.</param> /// <returns>MetricValues.</returns> private static MetricValues GetMetricValues( DataLakeClient dataLakeClient, SparkMetricsTestContent testContent, SparkClient sparkClient, SparkClientSettings sparkClientSettings, CancellationToken cancellationToken) { string currentPath = testContent.GetCurrentStreamPath(); string previousPath = testContent.GetPreviousStreamPath(); var current = GetMetricValue( testContent, sparkClient, sparkClientSettings, currentPath, dataLakeClient, cancellationToken); var previous = GetMetricValue( testContent, sparkClient, sparkClientSettings, previousPath, dataLakeClient, cancellationToken); var metricValues = new MetricValues { Baseline = previous, Current = current }; return(metricValues); }
public Configuration( ISecretProvider secretProvider = null, DatabricksClientWrapper databricksClientWrapper = null, DataLakeClient dataLakeClient = null) { this.DataLakeClient = dataLakeClient; this.DatabricksClientWrapper = databricksClientWrapper; }
/// <summary> /// Processes the metrics correctness test. /// </summary> /// <param name="dataLakeClient">The data lake client.</param> /// <param name="testContent">Content of the test.</param> /// <param name="sparkClient">The spark client.</param> /// <param name="sparkClientSettings">The spark client settings.</param> /// <returns>SparkMetricsTestResult.</returns> /// <exception cref="NotSupportedException">ComparisonType {testContent.ComparisonType}</exception> private static List <MetricsTestResult> ProcessMetricsCorrectnessTest( DataLakeClient dataLakeClient, SparkMetricsTestContent testContent, SparkClient sparkClient, SparkClientSettings sparkClientSettings, CancellationToken cancellationToken) { MetricValues metricValues = null; switch (testContent.ComparisonType) { case ComparisonType.DayOverDay: case ComparisonType.WeekOverWeek: case ComparisonType.MonthOverMonth: case ComparisonType.YearOverYear: metricValues = GetMetricValues( dataLakeClient, testContent, sparkClient, sparkClientSettings, cancellationToken); break; case ComparisonType.VarianceToTarget: var metricValue = GetMetricValue( testContent, sparkClient, sparkClientSettings, testContent.GetCurrentStreamPath(), dataLakeClient, cancellationToken); testContent.NotebookParameters["cmdText"] = testContent.NotebookParameters["targetCmdText"]; var targetValue = GetMetricValue( testContent, sparkClient, sparkClientSettings, testContent.TargetStreamPath != null ? testContent.GetCurrentTargetStreamPath() : testContent.GetCurrentStreamPath(), dataLakeClient, cancellationToken); metricValues = new MetricValues { Baseline = targetValue, Current = metricValue }; break; default: throw new NotSupportedException($"ComparisonType {testContent.ComparisonType} not supported for SparkWorker"); } var results = new List <MetricsTestResult>(); foreach (var threshold in testContent.Thresholds) { var result = new MetricsTestResult { ComparisonType = testContent.ComparisonType, Date = testContent.Date, BaselineMetricValue = metricValues.Baseline[threshold.Name], MetricValue = metricValues.Current[threshold.Name], LowerBoundThreshold = threshold.LowerBound, UpperBoundThreshold = threshold.UpperBound, PercentDiff = MetricValues.ComputePercentDiff(metricValues, threshold.Name), PreviousDate = testContent.GetPreviousDate(), TestName = testRunName, TestRunId = testRunId, MetricName = threshold.Name }; results.Add(result); } return(results); }
/// <summary> /// Gets the metric value. /// </summary> /// <param name="testContent">Content of the test.</param> /// <param name="sparkClient">The spark client.</param> /// <param name="sparkClientSettings">The spark client settings.</param> /// <param name="stream">The stream.</param> /// <param name="dataLakeClient">The data lake client.</param> /// <returns>System.Nullable<System.Double>.</returns> /// <exception cref="InvalidOperationException">Stream does not exist : {stream}</exception> private static IDictionary <string, double> GetMetricValue( SparkMetricsTestContent testContent, SparkClient sparkClient, SparkClientSettings sparkClientSettings, string stream, DataLakeClient dataLakeClient, CancellationToken cancellationToken) { if (!dataLakeClient.CheckExists(testContent.GetDatalakeStore(), stream)) { throw new FileNotFoundException($"Stream does not exist : {stream}"); } var sparkRequest = new SparkClientRequest { NodeType = sparkClientSettings.NodeType, NumWorkersMin = sparkClientSettings.NumWorkersMin, NumWorkersMax = sparkClientSettings.NumWorkersMax, CostPerNode = GetCostPerNode(sparkClientSettings.NodeTypes, sparkClientSettings.NodeType), Libraries = sparkClientSettings.Libraries, NotebookPath = testContent.NotebookPath, NotebookParameters = testContent.NotebookParameters ?? new Dictionary <string, string>(), TestRunId = testRunId, TimeoutSeconds = sparkClientSettings.TimeoutSeconds }; Console.WriteLine($"Running notebook={testContent.NotebookPath} for DataLakeStore={testContent.GetDatalakeStore()}, Path={stream}"); var mountPoint = GetMountPoint(testContent.GetDatalakeStore(), stream); Console.WriteLine($"Running notebook={testContent.NotebookPath} for mountPoint={mountPoint}"); string streamPath = mountPoint; // Disable this function //if (testContent.ConvertToParquet) //{ // var parquetFile = messageTag.MountPointToParquetFile[mountPoint]; // Console.WriteLine($"Running notebook={testContent.NotebookPath} using parquetFile={parquetFile}"); // streamPath = parquetFile; //} //else //{ // streamPath = mountPoint; //} sparkRequest.NotebookParameters["streamPath"] = streamPath; Console.WriteLine($"Notebook parameters : {string.Join(", ", sparkRequest.NotebookParameters.Select(t => t.Key + "=" + t.Value))}"); // Log request to OMS var response = sparkClient.RunNotebook(sparkRequest, cancellationToken); response.TestRunId = testRunId; if (response.IsRunSuccess()) { // For format reference see: // https://pandas.pydata.org/pandas-docs/version/0.24.2/reference/api/pandas.DataFrame.to_json.html var resultDataFrame = JsonConvert.DeserializeObject <Dictionary <string, Dictionary <string, double> > >(response.RunOutput); var resultDictionary = new Dictionary <string, double>(); foreach (var pair in resultDataFrame) { // pair.Value is the column name if (pair.Value == null || pair.Value.Count == 0) { throw new InvalidOperationException("Result does not contain any rows"); } // We take the first row only resultDictionary.Add(pair.Key, pair.Value.First().Value); } return(resultDictionary); } else { Console.WriteLine("Error getting metric."); Console.WriteLine(JObject.Parse(JsonConvert.SerializeObject(response))); throw new Exception($"Error getting metric. TestRun = {testRunId}, Spark job {response?.Run?.RunId} failed"); } }
public static async Task Run( [TimerTrigger("%RedisDataPumpSchedule%", RunOnStartup = true)] TimerInfo myTimer, ILogger log) { // Get startTicks long startTicks = DateTime.UtcNow.Ticks; using (log.BeginScope(logProperties)) // This DataLake client is used for writing exceptions that occur inside RedisDataPump. Do NOT remove it. using (DataLakeClient dataLake = new DataLakeClient(log)) { try { List <Task> tasks = new List <Task>(); // Pump and dump Redis lists (default: res-datalake) tasks.Add(new DataLakeClient(log).FlushRedis(startTicks, 55000)); // Named tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.AccountsPayableInvoiceInbound)).FlushRedis(startTicks, 55000)); tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.AccountsPayableInvoiceOutbound)).FlushRedis(startTicks, 55000)); tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.AccountsPayablePayInbound)).FlushRedis(startTicks, 55000)); tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.AdvanceShippingNoticeInbound)).FlushRedis(startTicks, 55000)); tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.Adyen)).FlushRedis(startTicks, 55000)); tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.Coupon)).FlushRedis(startTicks, 55000)); tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.CouponDeliveryOutbound)).FlushRedis(startTicks, 55000)); tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.CustomerInbound)).FlushRedis(startTicks, 55000)); tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.CustomerOutbound)).FlushRedis(startTicks, 55000)); tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.DropshipOrderInbound)).FlushRedis(startTicks, 55000)); tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.EdiPurchaseOrder)).FlushRedis(startTicks, 55000)); tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.EmployeeInbound)).FlushRedis(startTicks, 55000)); tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.FraudOutbound)).FlushRedis(startTicks, 55000)); tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.GeneralLedgerOutbound)).FlushRedis(startTicks, 55000)); tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.GenericApi)).FlushRedis(startTicks, 55000)); tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.GenericInbound)).FlushRedis(startTicks, 55000)); tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.GenericOutbound)).FlushRedis(startTicks, 55000)); tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.Invalid)).FlushRedis(startTicks, 55000)); tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.InventoryInbound)).FlushRedis(startTicks, 55000)); tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.InventoryOutbound)).FlushRedis(startTicks, 55000)); tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.Loyalty)).FlushRedis(startTicks, 55000)); tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.LoyaltyInbound)).FlushRedis(startTicks, 55000)); tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.LoyaltyOutbound)).FlushRedis(startTicks, 55000)); tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.MapleLake)).FlushRedis(startTicks, 55000)); tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.PricingOutbound)).FlushRedis(startTicks, 55000)); tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.ProductInbound)).FlushRedis(startTicks, 55000)); tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.ProductOutbound)).FlushRedis(startTicks, 55000)); tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.PurchaseAgreementOutbound)).FlushRedis(startTicks, 55000)); tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.PurchaseOrderConfirmInbound)).FlushRedis(startTicks, 55000)); tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.PurchaseOrderInbound)).FlushRedis(startTicks, 55000)); tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.SalesOrderConfirmOutbound)).FlushRedis(startTicks, 55000)); tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.SalesOrderDropshipOutbound)).FlushRedis(startTicks, 55000)); tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.SalesOrderInbound)).FlushRedis(startTicks, 55000)); tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.SalesOrderOutbound)).FlushRedis(startTicks, 55000)); tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.SalesOrderReturnInbound)).FlushRedis(startTicks, 55000)); tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.SalesOrderReturnOutbound)).FlushRedis(startTicks, 55000)); tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.SalesOrderSurveyOutbound)).FlushRedis(startTicks, 55000)); tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.SapReturnOrderInbound)).FlushRedis(startTicks, 55000)); tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.SapSalesOrderUpdateInbound)).FlushRedis(startTicks, 55000)); tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.SupportImport)).FlushRedis(startTicks, 55000)); tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.SvsGiftCardOutbound)).FlushRedis(startTicks, 55000)); tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.TaxwareOutbound)).FlushRedis(startTicks, 55000)); tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.TradeAgreementInbound_Map)).FlushRedis(startTicks, 55000)); tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.TradeAgreementInbound_Purch)).FlushRedis(startTicks, 55000)); tasks.Add(new DataLakeClient(log, Extension.ToString(Res.ServiceName.TradeAgreementOutbound)).FlushRedis(startTicks, 55000)); // Wait for all Tasks Task.WaitAll(tasks.ToArray()); } catch (Exception ex) { await RecoveryMessage.SendAsync(new RecoveryMessage() { ServiceName = ServiceName.DataLake, FamilyName = Extension.ToString(FamilyName.DataLake), FunctionName = nameof(RedisDataPump), MethodName = nameof(Run), ResourceGroup = Extension.ToString(Res.ServiceName.DataLake), Exception = ex, }, dataLake, log); } } }