private void GetColumnIssues(DM.QualityCheck qualityCheck, IEnumerable<QualityCheckColumnType> qualityCheckTypes, IEnumerable<SheetCell> headers, IEnumerable<SheetCell> columns, QualityCheckResult qualityCheckResult) { if (columns == null || !columns.Any()) { qualityCheckResult.Errors.Add(string.Format(CultureInfo.CurrentCulture, "No data except Headers")); return; } foreach (var headerColumn in headers) { //Logic for aliases QualityCheckColumnRule qualityCheckItem = null; foreach (var item in qualityCheck.QualityCheckColumnRules) { // get the quality check item if (item.HeaderName.Trim().Equals(headerColumn.Value.Trim(), StringComparison.OrdinalIgnoreCase)) { qualityCheckItem = item; break; } } if (qualityCheckItem == null) { continue; } // get the column index of the header row GetColumnIndex(headerColumn.Value); // get all the child rows of this specfic header var columnRowDataCollection = columns.Where(c => c.ColumnName.Trim().Equals(headerColumn.ColumnName.Trim(), StringComparison.OrdinalIgnoreCase)); // Checking for numerics and range on numarics if (qualityCheckTypes.Where(q => q.QualityCheckColumnTypeId == qualityCheckItem.QualityCheckColumnTypeId).FirstOrDefault().Name == "Numeric")// Numeric { bool isTypevalid = true; foreach (var columnData in columnRowDataCollection) { if (!columnData.Value.IsNumeric()) { isTypevalid = false; qualityCheckResult.Errors.Add(string.Format(CultureInfo.CurrentCulture, "Data under header '{0}' should be of Numeric type", headerColumn.Value)); break; } } // Checking for range validation if (isTypevalid && !string.IsNullOrEmpty(qualityCheckItem.Range)) { string[] rangeValues = qualityCheckItem.Range.Split(new string[] { Utilities.Constants.RangeSeparator }, StringSplitOptions.None); foreach (var columnData in columnRowDataCollection) { string message = CheckForRange(columnData.Value, rangeValues, headerColumn.Value); if (!string.IsNullOrEmpty(message)) { qualityCheckResult.Errors.Add(message); break; } } } } } }
/// <summary> /// Method to get quality check validation conditions for excel object /// </summary> /// <param name="stream">Stream</param> /// <param name="sheetIds">Sheet Ids</param> /// <param name="qualityCheck">Quality Check</param> /// <param name="qualityCheckTypes">Quality Check type</param> /// <returns>List of QualityCheckResult</returns> private IEnumerable<QualityCheckResult> GetQualityCheckRulesForExcel(Stream stream, string sheetIds, DM.QualityCheck qualityCheck, IEnumerable<QualityCheckColumnType> qualityCheckTypes) { List<QualityCheckResult> qualityCheckResults = new List<QualityCheckResult>(); using (SpreadsheetDocument excelDocument = SpreadsheetDocument.Open(stream, false)) { foreach (var sheetId in GetSheetIds(sheetIds)) { var currentSheet = excelDocument.WorkbookPart.Workbook.Descendants<Sheet>().FirstOrDefault(s => s.Id == sheetId); IEnumerable<SheetCell> headersAndColumns = new List<SheetCell>(); if (excelDocument.WorkbookPart != null) { headersAndColumns = ExcelFileHelper.GetHeadersAndColumns(excelDocument, sheetId); } var headers = headersAndColumns.Where(c => c.RowIndex == 1).ToList(); var columns = headersAndColumns.Where(c => c.RowIndex != 1).ToList(); QualityCheckResult qualityCheckResult = new QualityCheckResult() { SheetId = currentSheet.Id, SheetName = currentSheet.Name }; qualityCheckResults.Add(qualityCheckResult); GetHeaderIssues(qualityCheck, headers, qualityCheckResult); GetColumnIssues(qualityCheck, qualityCheckTypes, headers, columns, qualityCheckResult); } } return qualityCheckResults; }
private void GetHeaderIssues(DM.QualityCheck qualityCheck, IEnumerable<SheetCell> headers, QualityCheckResult qualityCheckResult) { var isValidFile = true; var headerIssueExists = false; //Check the headers are there or not if (headers != null && headers.Any()) { // CHECK THE ITEMS ARE STARTING FROM THE A1 if (headers.FirstOrDefault().ColumnLocation != "A1") { isValidFile = false; headerIssueExists = true; qualityCheckResult.Errors.Add(string.Concat("Invalid file, headers should start from A1 location")); } } else { isValidFile = false; headerIssueExists = true; foreach (var qcName in qualityCheck.QualityCheckColumnRules) { qualityCheckResult.Errors.Add(string.Format(CultureInfo.CurrentCulture, "Header '{0}' is missing", qcName.HeaderName)); } } if (!isValidFile)//If it is valid file ,then only check the other conditions { return; } SheetCell header; //first check all the name exists or not and if it is required show the missing message foreach (var qcName in qualityCheck.QualityCheckColumnRules) { header = headers.Where(h => h.Value.Trim().Equals(qcName.HeaderName.Trim(), StringComparison.OrdinalIgnoreCase)).FirstOrDefault(); if (header == null && qcName.IsRequired == true) { headerIssueExists = true; qualityCheckResult.Errors.Add(string.Format(CultureInfo.CurrentCulture, "Header '{0}' is missing", qcName.HeaderName)); } } // If enforcecheck order is true ,then append the message if (headerIssueExists || qualityCheck.EnforceOrder == false) { return; } var qualityCheckColumnRules = qualityCheck.QualityCheckColumnRules.OrderBy(col => col.Order); List<int> columnIndices = new List<int>(); foreach (var qcName in qualityCheckColumnRules) { header = headers.Where(h => string.Compare(h.Value.Trim(), qcName.HeaderName.Trim(), StringComparison.OrdinalIgnoreCase) == 0).FirstOrDefault(); if (header != null) { var columnLocationIndex = GetColumnLocationIndex(header.Value, headers); columnIndices.Add(columnLocationIndex); } } // Cheking hte order of the columns if (!Utilities.Helper.CheckIntListAscOrder(columnIndices)) { headerIssueExists = true; qualityCheckResult.Errors.Add(string.Format(CultureInfo.CurrentCulture, "Headers are not in order")); } }
public async Task<IEnumerable<Utilities.Model.QualityCheckResult>> GetQualityCheckIssues(DomainModel.File fileDetail, DomainModel.QualityCheck qualityCheck, IEnumerable<DomainModel.QualityCheckColumnType> qualityCheckTypes, string sheetIds) { Check.IsNotNull(fileDetail, "fileDetail"); Check.IsNotNull(qualityCheck, "qualityCheck"); Check.IsNotNull(qualityCheckTypes, "qualityCheckTypes"); List<QualityCheckResult> qualityCheckResults = new List<QualityCheckResult>(); await Task.Factory.StartNew(() => { using (Stream dataStream = base.BlobDataRepository.GetBlob(fileDetail.BlobId)) { using (StreamReader reader = new StreamReader(new MemoryStream())) { // copy stream to input stream on the reader to parse content dataStream.Seek(0, SeekOrigin.Begin); dataStream.CopyToStream(reader.BaseStream); // seek the begening of content reader.BaseStream.Seek(0, SeekOrigin.Begin); string fileName = Path.GetFileNameWithoutExtension(fileDetail.Name); QualityCheckResult errorResult = new QualityCheckResult() { SheetId = fileName, SheetName = fileName }; bool isHeaderRow = true; Dictionary<int, string> dataErrorsMap = new Dictionary<int, string>(); Dictionary<int, Tuple<QualityCheckColumnRule, QualityCheckColumnType>> validationIndex = new Dictionary<int, Tuple<QualityCheckColumnRule, QualityCheckColumnType>>(); // iterate over each record in the data file while (reader.Peek() >= 0) { // read the current line string dataRow = reader.ReadLine(); List<string> elements = dataRow.Split(',').Select(e => e.Trim()).ToList(); // check if this is the header row and run header validations if (isHeaderRow) { List<string> headerIssues = GetHeaderIssues(elements, qualityCheck); // if errors exist append to the error result if (headerIssues.Count > 0) { errorResult.Errors = errorResult.Errors.Concat(headerIssues.AsEnumerable()).ToList(); } // create an inverted index of header position to column rule and data type map. // this cached map will be used to validate each data record. validationIndex = BuildValidationIndex(elements, qualityCheck, qualityCheckTypes); isHeaderRow = !isHeaderRow; continue; } Dictionary<int, string> dataIssues = new Dictionary<int, string>(); try { // get the data issues in the current record dataIssues = GetDataIssues(elements, validationIndex); } catch (IndexOutOfRangeException) { errorResult.Errors.Add(Messages.QualityCheck_ExecutionFailure); break; } catch (ArgumentOutOfRangeException) { errorResult.Errors.Add(Messages.QualityCheck_ExecutionFailure); break; } // select errors in columns which are not already in error map IEnumerable<KeyValuePair<int, string>> dataErrors = from i in dataIssues where !dataErrorsMap.ContainsKey(i.Key) select i; // if there new columns in error, then augment the errors to // the error map if (dataErrors.Any()) { dataErrorsMap = dataErrorsMap.Concat(dataErrors) .ToDictionary(kvp => kvp.Key, kvp => kvp.Value); } // check if the count of data errors is equal to the configured quality check // column rules. atmost 1 error per data column is recorded, so if all columns // are in error, then no need to inspect rest of the records. if (dataErrorsMap.Count.Equals(qualityCheck.QualityCheckColumnRules.Count)) { break; } } // merge data errors with main error collection errorResult.Errors = errorResult.Errors.Concat(dataErrorsMap.Values.AsEnumerable()).ToList(); // add error result to results collection qualityCheckResults.Add(errorResult); } } }); return qualityCheckResults; }