protected void UpdateHarvesterRecord(Action <string> logMessage, IEnumerable <DirectoryObjectMetadata> sourceFiles, string sourceName, RepositoryArgumentsBase harvesterArgs) { using (IDatabaseRepository <IHarvesterDataContext> harvester = RepositoryFactory.CreateHarvesterRepository(harvesterArgs)) { logMessage($"Connected to database '{harvester.Name}' ({harvester.ConnectionString})"); int insertedRecords = 0; Entities.Repository repository = harvester.DataContext.Repositories.First(y => y.Name == sourceName); if (OperationID == 0) { logMessage("Warning: OperationID was not set properly. Correcting this."); OperationID = harvester.DataContext.Operations.First(d => d.Name == Name).ID; } Dictionary <String, DirectoryRecord> dictionary = harvester.DataContext.DirectoryRecords.Where(d => d.OperationID == OperationID && d.RepositoryID == repository.ID).ToDictionary(d => d.FilePath); foreach (DirectoryObjectMetadata file in sourceFiles) { if (!dictionary.ContainsKey(file.Path)) { harvester.DataContext.DirectoryRecords.InsertOnSubmit(new DirectoryRecord { OperationID = OperationID, RepositoryID = repository.ID, FilePath = file.Path, FileModifiedDate = file.ModifiedDate, CreationDate = DateTime.Now, ModifiedDate = DateTime.Now }); insertedRecords++; } else { DirectoryRecord element = dictionary[file.Path]; if (file.ModifiedDate > element.FileModifiedDate) { element.FileModifiedDate = file.ModifiedDate; element.ModifiedDate = DateTime.Now; } } } harvester.DataContext.SubmitChanges(); logMessage("Inserted " + insertedRecords + " successful files into DirectoryRecords"); } }
public override void Execute(DateTime runDate, Action <string> logMessage, System.Threading.CancellationToken cancellationToken) { using (IDirectoryRepository source = RepositoryFactory.CreateDirectoryRepository(_directoryArgs)) { logMessage($"Connected to source repository '{source.Name}' ({source.ConnectionString})"); using (IDirectoryRepository destination = RepositoryFactory.CreateDirectoryRepository(_logDirectoryArgs)) { logMessage($"Connected to destination repository '{destination.Name}' ({destination.ConnectionString})"); Regex filePattern = new Regex(_arguments.FilePattern); foreach (DirectoryObjectMetadata file in source.ListFiles().Where(x => x.Path.Contains(".zip"))) { string tempZipDirectoryPath = file.Path.Replace(".zip", ""); string tempZipDirectoryName = file.Name.Replace(".zip", ""); ZipFile.ExtractToDirectory(file.Path, tempZipDirectoryPath); source.DeleteFile(file.Path); foreach (String unzippedfile in source.ListFiles(tempZipDirectoryName).Select(x => x.Path).Where(x => filePattern.IsMatch(x))) { string filename = unzippedfile.Split(new[] { "\\" }, StringSplitOptions.None).Last(); List <String> currentFiles = source.ListFiles().Select(x => x.Name).ToList(); if (!currentFiles.Contains(filename)) { source.MoveFile(unzippedfile, filename); } } foreach (String gzipFile in source.ListFiles(tempZipDirectoryName).Select(x => x.Path).Where(x => x.Contains(".gz"))) { string fileNameConcat = tempZipDirectoryName + ".log"; List <String> currentFiles = destination.ListFiles().Select(x => x.Name).ToList(); if (!currentFiles.Contains(fileNameConcat)) { using (GZipStream gzipStream = new GZipStream(source.OpenFile(gzipFile), CompressionMode.Decompress)) { using (Stream unzippedDestination = destination.CreateFile(fileNameConcat, Repository.Directory.FileCreationMode.ThrowIfFileExists)) { gzipStream.CopyTo(unzippedDestination); } } } } source.DeleteDirectory(tempZipDirectoryPath); } } List <String> modified = new List <String>(); Int32 newCount = 0; using (IDatabaseRepository <IHarvesterDataContext> harvester = RepositoryFactory.CreateHarvesterRepository(_harvesterArgs)) { logMessage($"Connected to database '{harvester.Name}' ({harvester.ConnectionString})"); IEnumerable <DirectoryObjectMetadata> sourceFiles = source.ListFiles("/"); Dictionary <String, DirectoryRecord> dictionary = harvester.DataContext.DirectoryRecords.Where(d => d.Operation.Name == Name && d.Repository.Name == source.Name).ToDictionary(d => d.FilePath); Entities.Repository repository = harvester.DataContext.Repositories.First(x => x.Name == source.Name); if (OperationID == 0) { logMessage("Warning: OperationID was not set properly. Correcting this."); OperationID = harvester.DataContext.Operations.First(d => d.Name == Name).ID; } foreach (DirectoryObjectMetadata file in sourceFiles) { if (!dictionary.ContainsKey(file.Path)) { modified.Add(file.Name); newCount++; harvester.DataContext.DirectoryRecords.InsertOnSubmit(new DirectoryRecord { OperationID = OperationID, RepositoryID = repository.ID, FilePath = file.Path, FileModifiedDate = file.ModifiedDate, CreationDate = DateTime.Now, ModifiedDate = DateTime.Now }); } else { DirectoryRecord element = dictionary[file.Path]; if (file.ModifiedDate > element.FileModifiedDate) { modified.Add(file.Name); element.FileModifiedDate = file.ModifiedDate; element.ModifiedDate = DateTime.Now; } } } if (cancellationToken.IsCancellationRequested) { source.Dispose(); harvester.Dispose(); cancellationToken.ThrowIfCancellationRequested(); } harvester.DataContext.SubmitChanges(); } logMessage($"Discovered {modified.Count} files to be processed ({newCount} new and {modified.Count - newCount} updated)."); if (modified.Count == 0) { return; } using (IDatabaseRepository <IStatisticsDataContext> destination = RepositoryFactory.CreateStatisticsRepository(_statisticsArgs)) { logMessage($"Connected to database '{destination.Name}' ({destination.ConnectionString})"); StreamParser <EZProxyAudit> Parser = new StreamParser <EZProxyAudit>(); List <EZProxyAudit> records = modified.Select(file => { logMessage($"Processing '{file}':"); int lineNumber = 0; return(Parser.ParseStream(source.OpenFile(file)).Select(x => new EZProxyAudit { DateTime = x.DateTime, Event = x.Event, IP = x.IP, Other = x.Other, Session = x.Session, Username = x.Username, LineNumber = lineNumber++, })); }).SelectMany(x => x).ToList(); logMessage($"Records Found: {records.Count}"); destination.DataContext.BulkImportEZProxyAudit(records.ToDataReader(r => new object[] { r.DateTime, r.Event, r.IP, r.Username, r.Session, r.Other, r.LineNumber })); } } }
public override void Execute(DateTime runDate, Action <string> logMessage, CancellationToken cancellationToken) { using (IDirectoryRepository source = RepositoryFactory.CreateDirectoryRepository(_directoryArgs)) { logMessage($"Connected to source repository '{source.Name}' ({source.ConnectionString})"); Regex filePattern = new Regex(@"^((ITU[.]Item_inventories[.]\d{8})|(ITU[.]Circulation_Item_Inventories[.]\d{8}))[.]txt([.]zip)?$", RegexOptions.IgnoreCase); var sourceFilesBeforeZip = source.ListFiles().Where(y => filePattern.IsMatch(y.Name)).ToArray(); foreach (DirectoryObjectMetadata zipFile in sourceFilesBeforeZip.Where(x => x.Path.Contains(".txt.zip"))) { if (sourceFilesBeforeZip.All(x => x.Name != zipFile.Name.Replace(".zip", ""))) { ZipFile.ExtractToDirectory(zipFile.Path, Path.GetDirectoryName(zipFile.Path.Replace(".txt.zip", ""))); } //source.DeleteFile(zipFile.Path); } List <String> modified = new List <String>(); Int32 newCount = 0; List <DirectoryObjectMetadata> sourceFiles = source.ListFiles("/").Where(y => filePattern.IsMatch(y.Name) && !y.Name.EndsWith(".zip")).ToList(); using (IDatabaseRepository <IHarvesterDataContext> harvester = RepositoryFactory.CreateHarvesterRepository(_harvesterArgs)) { logMessage($"Connected to database '{harvester.Name}' ({harvester.ConnectionString})"); Entities.Repository repository = harvester.DataContext.Repositories.First(y => y.Name == source.Name); if (OperationID == 0) { logMessage("Warning: OperationID was not set properly. Correcting this."); OperationID = harvester.DataContext.Operations.First(d => d.Name == Name).ID; } Dictionary <String, DirectoryRecord> dictionary = harvester.DataContext.DirectoryRecords.Where(d => d.OperationID == OperationID && d.RepositoryID == repository.ID).ToDictionary(d => d.FilePath); foreach (DirectoryObjectMetadata file in sourceFiles) { if (!dictionary.ContainsKey(file.Path)) { modified.Add(file.Name); newCount++; } else { DirectoryRecord element = dictionary[file.Path]; if (file.ModifiedDate > element.FileModifiedDate) { modified.Add(file.Name); } } } } if (modified.Count == 0 && newCount == 0) { logMessage("No Records to be processed."); return; } logMessage($"Discovered {modified.Count} files to be processed ({newCount} new and {modified.Count - newCount} updated)."); if (cancellationToken.IsCancellationRequested) { source.Dispose(); cancellationToken.ThrowIfCancellationRequested(); } using (IDatabaseRepository <IStatisticsDataContext> destination = RepositoryFactory.CreateStatisticsRepository(_databaseArgs)) { logMessage($"Connected to destination database '{destination.Name}' ({destination.ConnectionString})"); bool exceptionHandled = false; List <DirectoryObjectMetadata> successfulFiles = new List <DirectoryObjectMetadata>(); StreamParser <WmsInventoryRecord> Parser = new StreamParser <WmsInventoryRecord>(); StreamParser <WmsInventoryRecordEdge> ParserEdge = new StreamParser <WmsInventoryRecordEdge>(); StreamParser <WmsInventoryRecordDiff> ParserDiff = new StreamParser <WmsInventoryRecordDiff>(); StreamParser <WmsInventoryRecord2018> Parser2018 = new StreamParser <WmsInventoryRecord2018>(); if (cancellationToken.IsCancellationRequested) { source.Dispose(); cancellationToken.ThrowIfCancellationRequested(); } var completedFiles = modified.Select <string, (string file, DateTime RunDate, IEnumerable <IWmsInventoryRecord> records)>(file => { logMessage($"Processing '{file}':"); Stream inputStream = source.OpenFile(file); DateTime fileRunDate = GetFileDate(file); string headers = ExtractHeader(inputStream); //logMessage(headers); if (IsWmsInventoryRecord(headers)) { return(file, fileRunDate, Parser.ParseStream(inputStream)); } if (IsWmsInventoryRecord2018(headers)) { return(file, fileRunDate, Parser2018.ParseStream(inputStream)); } if (IsWmsInventoryRecordDiff(headers)) { return(file, fileRunDate, ParserDiff.ParseStream(inputStream)); } if (IsWmsInventoryRecordEdge(headers)) { return(file, fileRunDate, ParserEdge.ParseStream(inputStream)); } throw new InvalidDataException($"Header format not recognized: '{headers}'"); }).Select(wmsFileRecord => { var parsed = wmsFileRecord.records.Select(wms => new InventoryRecord { OclcNumber = wms.OclcNumber, Title = ParseTitle(wms.Title), Author = wms.Author, MaterialFormat = wms.MaterialFormat.ToString(), Barcode = wms.Barcode, Cost = wms.Cost, LastInventoriedDate = wms.LastInventoriedDate, DeletedDate = wms.ItemDeletedDate, ItemType = wms.ItemType.ToString(), CallNumber = wms.CallNumber, ShelvingLocation = wms.ShelvingLocation?.ToString(), CurrentStatus = wms.CurrentStatus?.ToString(), Description = wms.Description, RunDate = wmsFileRecord.RunDate, Anomalous = AnomalousBarcode(wms.Barcode), }).Where(y => y.Title != null || y.Barcode != null).GroupBy(x => new { x.OclcNumber, x.Barcode, x.RunDate }).Select(x => x.First()).ToArray(); logMessage($"Records Found: {parsed.Length}"); if (cancellationToken.IsCancellationRequested) { logMessage("Operation was cancelled"); exceptionHandled = true; return(null); } if (parsed.Length <= 0) { logMessage("Failed to parse properly and return any meaningful records. This might mean that non of the parsed records had a Title or Barcode."); exceptionHandled = true; return(null); } try { destination.DataContext.BulkImportInventory( parsed.ToDataReader(r => new object[] { r.OclcNumber, r.Title, r.MaterialFormat, r.Author, r.Barcode, r.Cost, r.LastInventoriedDate, r.DeletedDate, r.ItemType, r.CallNumber, r.ShelvingLocation, r.CurrentStatus, r.Description, r.RunDate, r.Anomalous })); } catch (SqlException ex) { logMessage(ex.Message); if (ex.InnerException != null) { logMessage(ex.InnerException.ToString()); } logMessage("Sql Server was most likely put into an unusable state after this exception and thus the whole operation was canceled."); exceptionHandled = true; } return(sourceFiles.First(x => x.Name == wmsFileRecord.file)); }).Where(x => x != null); foreach (var success in completedFiles) { successfulFiles.Add(success); } UpdateHarvesterRecord(logMessage, successfulFiles, source.Name, _harvesterArgs); if (exceptionHandled) { destination.DataContext.Connection.Close(); destination.DataContext.Dispose(); throw new Exception("An Exception was encountered. At least one file failed"); } } } }
/// <inheritdoc /> public override void Execute(DateTime runDate, Action <string> logMessage, CancellationToken cancellationToken) { using (IDatabaseRepository <IStatisticsDataContext> destination = RepositoryFactory.CreateStatisticsRepository(statisticsArguments)) { destination.DataContext.LogMessage = logMessage; logMessage($"Connected to destination database '{destination.Name}' ({destination.ConnectionString})"); using (ICounterRepository source = RepositoryFactory.CreateCounterRepository(counterRepoArguments)) { source.LogMessage = logMessage; logMessage($"Connected to source repository '{source.Name}' ({source.ConnectionString})"); Dictionary <CounterReport, List <CounterRecord> > preRecordsReports = source.AvailableReports.ToDictionary(report => report, report => source.RequestRecords(runDate, report).ToList()); if (!preRecordsReports.Any(x => x.Value.Count > 0)) { logMessage("No Records for this date"); return; } if (localJsonArguments != null) { foreach (var reportRecord in preRecordsReports) { using (IDirectoryRepository directoryRepo = RepositoryFactory.CreateDirectoryRepository(localJsonArguments)) { using (Stream stream = directoryRepo.CreateFile($"{source.Name} {runDate:yyyy-MM-dd} {reportRecord.Key}.json", FileCreationMode.Overwrite)) { JsonSerializer serializer = JsonSerializer.Create(); using (JsonTextWriter jsonTextWriter = new JsonTextWriter(new StreamWriter(stream))) { serializer.Serialize(jsonTextWriter, reportRecord.Value); } } } } } var preRecords = preRecordsReports.SelectMany(x => x.Value).ToList(); //Add Metrics of vendorRecords to single vendor record foreach (var recordsByRunDate in preRecords.GroupBy(x => x.RunDate)) { if (recordsByRunDate.Any(x => x.ItemType == ItemType.Vendor)) { foreach (var vendorRecord in recordsByRunDate.Where(x => x.ItemType == ItemType.Vendor)) { vendorRecord.Identifiers = new[] { new CounterIdentifier { IdentifierType = IdentifierType.Proprietary, IdentifierValue = source.Name } }; } } else { preRecords.Add(new CounterRecord { ItemName = source.Name, ItemType = ItemType.Vendor, RunDate = recordsByRunDate.Select(y => y.RunDate).First(), Identifiers = new[] { new CounterIdentifier { IdentifierType = IdentifierType.Proprietary, IdentifierValue = source.Name } }, Metrics = new CounterMetric[] { } }); } } if (preRecords.Any(x => x.ItemType == ItemType.Database)) { foreach (CounterRecord record in preRecords) { if (record.ItemType != ItemType.Database) { continue; } record.ItemPlatform = source.Name; record.Identifiers = new[] { new CounterIdentifier { IdentifierType = IdentifierType.Database, IdentifierValue = record.ItemName } }; } } else { preRecords.Add(new CounterRecord { ItemName = source.Name, ItemType = ItemType.Database, ItemPlatform = source.Name, RunDate = runDate, Identifiers = new[] { new CounterIdentifier { IdentifierType = IdentifierType.Database, IdentifierValue = source.Name } }, Metrics = new CounterMetric[] { } }); } var where = preRecords.Where(x => x.ItemType == ItemType.Vendor); Console.WriteLine(where.Count()); var records = preRecords .Where(x => !string.IsNullOrEmpty(x.ItemName)) .Select(SanitizeIdentifiers) .GroupBy(x => x, new CounterRecordComparer()) .Select(AggregateDuplicates) .Select((r, i) => new { Index = i, Record = r }); var counterRecords = records.GroupBy(x => x.Record.RunDate).ToList(); logMessage($"{preRecords.Count + 1} Total Records"); logMessage($"{recordswithNoIdentifiers} Records with no Identifiers"); logMessage($"{counterRecords.Sum(x => x.Count())} Unique Records"); if (cancellationToken.IsCancellationRequested) { source.Dispose(); destination.DataContext.Connection.Close(); destination.DataContext.Dispose(); destination.Dispose(); cancellationToken.ThrowIfCancellationRequested(); } foreach (var counterGroup in counterRecords) { destination.DataContext.BulkImportCounterTransactions( counterGroup .ToDataReader(r => new object[] { null, null, null, r.Index, r.Record.ItemName, r.Record.ItemPlatform, r.Record.ItemType, r.Record.RunDate }), counterGroup .SelectMany(r => r.Record.Identifiers.Bind(a => a.Select(i => new { r.Index, Identifier = i, r.Record.ItemType }))) .ToDataReader(i => new object[] { i.Index, i.Identifier.IdentifierType, i.Identifier.IdentifierValue, i.ItemType }), counterGroup .SelectMany(r => r.Record.Metrics.Bind(a => a.Select(m => new { r.Index, Metric = m }))) .ToDataReader(m => new object[] { m.Index, m.Metric.MetricType, m.Metric.MetricValue })); //Add Record of report being run using (var harvester = RepositoryFactory.CreateHarvesterRepository(harvesterArguments)) { if (OperationID == 0) { logMessage("Warning: OperationID was not set properly. Correcting this."); OperationID = harvester.DataContext.Operations.First(d => d.Name == Name).ID; } Entities.Repository repository = harvester.DataContext.Repositories.First(x => x.Name == source.Name); foreach (CounterReport report in preRecordsReports.Keys) { if (!harvester.DataContext.CounterOperationRecords.Any(x => x.OperationID == OperationID && x.RunDate == runDate && x.Report == report.ToString())) { harvester.DataContext.CounterOperationRecords.InsertOnSubmit(new CounterOperationRecord { OperationID = OperationID, RepositoryID = repository.ID, RunDate = runDate, Report = report.ToString(), ExecutedDate = DateTime.Now }); } else { harvester.DataContext.CounterOperationRecords.First(x => x.OperationID == OperationID && x.RunDate == runDate && x.Report == report.ToString()).ExecutedDate = DateTime.Now; } } harvester.DataContext.SubmitChanges(); } } } } }
public override void Execute(DateTime runDate, Action <string> logMessage, CancellationToken cancellationToken) { using (IDirectoryRepository source = RepositoryFactory.CreateDirectoryRepository(_directoryArgs)) { logMessage($"Connected to source repository '{source.Name}' ({source.ConnectionString})"); List <string> modified = new List <string>(); int newCount = 0; using (IDatabaseRepository <IHarvesterDataContext> harvester = RepositoryFactory.CreateHarvesterRepository(_harvesterArgs)) { logMessage($"Connected to database '{harvester.Name}' ({harvester.ConnectionString})"); Regex filePattern = new Regex(_arguments.FilePattern, RegexOptions.IgnoreCase); IEnumerable <DirectoryObjectMetadata> sourceFiles = source.ListFiles().Where(x => filePattern.IsMatch(x.Name)).ToArray(); Dictionary <string, DirectoryRecord> dictionary = harvester.DataContext.DirectoryRecords.Where(d => d.Operation.Name == Name && d.Repository.Name == source.Name).ToDictionary(d => d.FilePath); Entities.Repository repository = harvester.DataContext.Repositories.First(x => x.Name == source.Name); if (OperationID == 0) { logMessage("Warning: OperationID was not set properly. Correcting this."); OperationID = harvester.DataContext.Operations.First(d => d.Name == Name).ID; } foreach (DirectoryObjectMetadata file in sourceFiles) { if (!dictionary.ContainsKey(file.Path)) { modified.Add(file.Path); newCount++; } else { DirectoryRecord element = dictionary[file.Path]; if (file.ModifiedDate > element.FileModifiedDate) { modified.Add(file.Path); } } } if (cancellationToken.IsCancellationRequested) { source.Dispose(); harvester.Dispose(); cancellationToken.ThrowIfCancellationRequested(); } logMessage($"Discovered {modified.Count} files to be processed ({newCount} new and {modified.Count - newCount} updated)."); if (modified.Count == 0) { return; } List <StatistaRecord> records = new List <StatistaRecord>(); foreach (string file in modified) { logMessage($"Processing Statista file: {file}"); ExcelPackage package = new ExcelPackage(new FileInfo(file)); ExcelWorksheet worksheet = package.Workbook.Worksheets[1]; ExcelRange cells = worksheet.Cells; List <string> Fields = new List <string>(); for (int k = 1; k <= worksheet.Dimension.Columns; k++) { Fields.Add(cells[1, k].Value.ToString()); } Dictionary <string, int> fieldMap = Fields.Select((x, i) => new { index = i, value = x }).ToDictionary(x => x.value.ToLower(), x => x.index); List <List <string> > lines = new List <List <string> >(); for (int i = 2; i <= worksheet.Dimension.Rows; i++) { List <string> line = new List <string>(); for (int j = 1; j <= worksheet.Dimension.Columns; j++) { line.Add(cells[i, j].Text); } lines.Add(line); } int recordsSkipped = 0; int[] necessaryIndices = new[] { fieldMap["date"], fieldMap["title"], fieldMap["type of access"] }; foreach (List <string> line in lines) { if (line.All(EmptyField) || line.Where((x, i) => necessaryIndices.Contains(i)).Any(EmptyField)) { recordsSkipped++; continue; } for (int i = 0; i < line.Count; i++) { if (EmptyField(line[i])) { line[i] = null; } } records.Add(new StatistaRecord { Date = ParseDate(line[fieldMap["date"]]), Title = line[fieldMap["title"]], TypeofAccess = line[fieldMap["type of access"]], ID = fieldMap.ContainsKey("id") ? line[fieldMap["id"]] : null, ContentType = fieldMap.ContainsKey("content type") ? line[fieldMap["content type"]] : null, MainIndustry = fieldMap.ContainsKey("main industry") ? line[fieldMap["main industry"]] : null, Content = fieldMap.ContainsKey("content") ? line[fieldMap["content"]] : null, Subtype = fieldMap.ContainsKey("subtyp") ? line[fieldMap["subtyp"]] : null, }); } logMessage($"\t{records.Count}/{records.Count + recordsSkipped} records processed."); } using (IDatabaseRepository <IStatisticsDataContext> destination = RepositoryFactory.CreateStatisticsRepository(_statisticsArgs)) { destination.DataContext.BulkImportStatista( records.ToDataReader(r => new object[] { r.ID, r.Date, r.ContentType, r.MainIndustry, r.Title, r.TypeofAccess, r.Content, r.Subtype })); } UpdateHarvesterRecord(logMessage, sourceFiles, source.Name, _harvesterArgs); } } }
public void Add(Entities.Repository repository) { _context.Add(repository); }