ISheet OpenWorkbookSheetWithNpoi( FileStream stream, HzzoMedsDownloadDto med, HzzoMedsDownloadDto latestMed) { ISheet drugListSheet = null; try { if (med.FileName.ToLowerInvariant().EndsWith(".xls")) { var hssfWorkbook = new HSSFWorkbook(stream); drugListSheet = hssfWorkbook.GetSheetAt(0); } else { var xssfWorkbook = new XSSFWorkbook(stream); drugListSheet = xssfWorkbook.GetSheetAt(0); } } catch (Exception ex) { latestMed.Href += " - WORKSHEET COULD NOT BE PARSED"; _logger.LogError(ex, latestMed.Href); } return(drugListSheet); }
ISet <HzzoMedsDownloadDto> ParseMedsLiElements(IEnumerable <IElement> elems) => elems.Aggregate(new HashSet <HzzoMedsDownloadDto>(), (medsList, li) => { var href = li.QuerySelector("a").GetAttribute("href"); // NOTE: this domain is not available, links don't work :-( if (!href.Contains("cdn.hzzo.hr")) { var dtParts = li.TextContent.TrimEnd().Split(' ').LastOrDefault().Split('.'); var downloadDto = new HzzoMedsDownloadDto( href, $"{dtParts[2]}-{dtParts[1]}-{dtParts[0]}", Path.Combine(_appPathsInfo.ApplicationRootPath, DOWNLOAD_DIR) ); // NOTE: that's it folks, docs from 2013 and older are messed up // and can't be approached with this generic parser in this app // A more sophisticated parser (more if/else loops...) would be needed if (downloadDto.ValidFrom > filterDtParsable2013) { medsList.Add(downloadDto); } } return(medsList); });
Task SaveExcel(HzzoMedsDownloadDto doc) => Task.Factory.StartNew(async() => { using (var fileStream = File.Create(doc.FilePath, BUFFER_SIZE, FileOptions.Asynchronous)) using (doc.DocumentStream) { await CopyStream(doc.DocumentStream.Result, fileStream); await fileStream.FlushAsync(); await doc.DocumentStream.Result.FlushAsync(); } }, TaskCreationOptions.LongRunning);
static ISet <HzzoMedsDownloadDto> ParseMedsLiElements(IEnumerable <IElement> elems) => elems.Aggregate(new HashSet <HzzoMedsDownloadDto>(), (medsList, li) => { var href = li.QuerySelector("a").GetAttribute("href"); // NOTE: this domain is not available, links don't work if (!href.Contains("cdn.hzzo.hr")) { var downloadDto = new HzzoMedsDownloadDto( href, li.TextContent.TrimEnd().Split(' ').LastOrDefault(), DOWNLOAD_DIR); if (downloadDto.ValidFrom > filterDtParsable2013) { medsList.Add(downloadDto); } } return(medsList); });
void ParseHzzoExcelDocuments(IEnumerable <HzzoMedsDownloadDto> filteredMeds, DrugListType listType, bool isListStartingWith2014) { HzzoMedsDownloadDto latestMed = null; int latestRow = 0; int latestCol = 0; try { // HACK: due to a bug with the parser introduced in 2019, we are skipping year 2019 foreach (var med in filteredMeds.Where(x => x.ValidFrom.Year < 2019)) { latestMed = med; using (var stream = File.Open(med.FilePath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) { var drugListSheet = OpenWorkbookSheetWithNpoi(stream, med, latestMed); if (drugListSheet == null) { return; } var totalRows = drugListSheet.LastRowNum; int rowIndex = 1; // skips header row int colIndex = 0; int incColNumber() => latestCol = colIndex++; string GetNextString() => drugListSheet.GetRow(rowIndex).GetCell(incColNumber()).ToString(); (bool, string) TryGetNextString() { var row = drugListSheet.GetRow(rowIndex); var cIndex = incColNumber(); if (row == null || row.GetCell(cIndex) == null) { return(false, null); } return(true, drugListSheet.GetRow(rowIndex).GetCell(cIndex).ToString()); } decimal?GetNextDecimal() => decimal.TryParse( drugListSheet.GetRow(rowIndex).GetCell(incColNumber()).ToString(), out decimal dec) ? dec : new decimal?(); string GetEnumStrVal() => drugListSheet.GetRow(rowIndex).GetCell(incColNumber()) .ToString().Replace(@"\", string.Empty).Replace("/", string.Empty); DrugApplicationType ParseNextDrugApplicationType() { var strVal = GetEnumStrVal(); return(string.IsNullOrWhiteSpace(strVal) ? DrugApplicationType.Undefined : EnumExtensions.Parse <DrugApplicationTypeShort, DrugApplicationType>(strVal)); } DrugPrescriptionType ParseNextDrugPrescriptionType() { var strVal = GetEnumStrVal(); return(string.IsNullOrWhiteSpace(strVal) ? DrugPrescriptionType.Unprescribed : EnumExtensions.Parse <DrugPrescriptionTypeShort, DrugPrescriptionType>(strVal)); } DrugApplicationTypeLimitation ParseNextDrugApplicationTypeLimitation() { var strVal = GetEnumStrVal(); return(string.IsNullOrWhiteSpace(strVal) ? DrugApplicationTypeLimitation.Undefined : EnumExtensions.Parse <DrugApplicationTypeLimitationShort, DrugApplicationTypeLimitation>(strVal)); } for (; rowIndex <= totalRows; rowIndex++) { latestCol = colIndex = 0; latestRow = rowIndex; var(hasRow, atkCode) = TryGetNextString(); if (!hasRow) { continue; } var importDto = new HzzoMedsImportDto(); importDto.RowId = rowIndex; importDto.ListType = listType; importDto.ValidFrom = med.ValidFrom; importDto.AtkCode = atkCode; importDto.ApplicationTypeLimitation = ParseNextDrugApplicationTypeLimitation(); importDto.GenericName = GetNextString(); importDto.UnitOfDistribution = GetNextString(); importDto.UnitOfDistributionPriceWithoutPDV = GetNextDecimal(); importDto.UnitOfDistributionPriceWithPDV = GetNextDecimal(); importDto.ApplicationType = ParseNextDrugApplicationType(); importDto.ApprovedBy = isListStartingWith2014 ? GetNextString() : null; importDto.Manufacturer = GetNextString(); importDto.RegisteredName = GetNextString(); importDto.OriginalPackagingDescription = GetNextString(); importDto.OriginalPackagingSingleUnitPriceWithoutPdv = GetNextDecimal(); importDto.OriginalPackagingSingleUnitPriceWithPdv = GetNextDecimal(); importDto.OriginalPackagingPriceWithoutPdv = GetNextDecimal(); importDto.OriginalPackagingPriceWithPdv = GetNextDecimal(); // NOTE: supplementary prices if (listType == DrugListType.Supplementary) { importDto.OriginalPackagingSingleUnitPricePaidByHzzoWithoutPdv = GetNextDecimal(); importDto.OriginalPackagingSingleUnitPricePaidByHzzoWithPdv = GetNextDecimal(); importDto.OriginalPackagingPricePaidByHzzoWithoutPdv = GetNextDecimal(); importDto.OriginalPackagingPricePaidByHzzoWithPdv = GetNextDecimal(); importDto.OriginalPackagingSingleUnitPriceExtraChargeWithoutPdv = GetNextDecimal(); importDto.OriginalPackagingSingleUnitPriceExtraChargeWithPdv = GetNextDecimal(); importDto.OriginalPackagingPriceExtraChargeWithoutPdv = GetNextDecimal(); importDto.OriginalPackagingPriceExtraChargeWithPdv = GetNextDecimal(); } importDto.PrescriptionType = ParseNextDrugPrescriptionType(); importDto.IndicationsCode = GetNextString(); importDto.DirectionsCode = GetNextString(); importDto.DrugGroupCode = GetNextString(); importDto.DrugGroup = GetNextString(); importDto.DrugSubgroupCode = GetNextString(); importDto.DrugSubgroup = GetNextString(); med.MedsList.Add(importDto); } } med.MarkAsParsed(); _logger.LogInformation( "Parsed document ({parsedCount}/{medsCount}): '{filename}'", ++_parsedCount, _medsCount, med.FileName); } ; } catch (Exception ex) { var str = new StringBuilder() .AppendLine(" latest med: ").Append(latestMed.FileName) .AppendLine(" latest row: ").Append(latestRow) .AppendLine(" latest col: ").Append(latestCol); throw new InvalidOperationException(str.ToString(), ex); } }
HzzoMedsDownloadDto DownloadExcel(HzzoMedsDownloadDto doc) { doc.DocumentStream = _httpCli.GetStreamAsync(doc.Href); return(doc); }