Ejemplo n.º 1
0
        protected void UpdateHarvesterRecord(Action <string> logMessage, IEnumerable <DirectoryObjectMetadata> sourceFiles, string sourceName, RepositoryArgumentsBase harvesterArgs)
        {
            using (IDatabaseRepository <IHarvesterDataContext> harvester = RepositoryFactory.CreateHarvesterRepository(harvesterArgs))
            {
                logMessage($"Connected to database '{harvester.Name}' ({harvester.ConnectionString})");

                int insertedRecords = 0;

                Entities.Repository repository = harvester.DataContext.Repositories.First(y => y.Name == sourceName);

                if (OperationID == 0)
                {
                    logMessage("Warning: OperationID was not set properly. Correcting this.");
                    OperationID = harvester.DataContext.Operations.First(d => d.Name == Name).ID;
                }

                Dictionary <String, DirectoryRecord> dictionary = harvester.DataContext.DirectoryRecords.Where(d => d.OperationID == OperationID && d.RepositoryID == repository.ID).ToDictionary(d => d.FilePath);

                foreach (DirectoryObjectMetadata file in sourceFiles)
                {
                    if (!dictionary.ContainsKey(file.Path))
                    {
                        harvester.DataContext.DirectoryRecords.InsertOnSubmit(new DirectoryRecord
                        {
                            OperationID      = OperationID,
                            RepositoryID     = repository.ID,
                            FilePath         = file.Path,
                            FileModifiedDate = file.ModifiedDate,
                            CreationDate     = DateTime.Now,
                            ModifiedDate     = DateTime.Now
                        });
                        insertedRecords++;
                    }
                    else
                    {
                        DirectoryRecord element = dictionary[file.Path];

                        if (file.ModifiedDate > element.FileModifiedDate)
                        {
                            element.FileModifiedDate = file.ModifiedDate;
                            element.ModifiedDate     = DateTime.Now;
                        }
                    }
                }

                harvester.DataContext.SubmitChanges();
                logMessage("Inserted " + insertedRecords + " successful files into DirectoryRecords");
            }
        }
        public override void Execute(DateTime runDate, Action <string> logMessage, System.Threading.CancellationToken cancellationToken)
        {
            using (IDirectoryRepository source = RepositoryFactory.CreateDirectoryRepository(_directoryArgs))
            {
                logMessage($"Connected to source repository '{source.Name}' ({source.ConnectionString})");

                using (IDirectoryRepository destination = RepositoryFactory.CreateDirectoryRepository(_logDirectoryArgs))
                {
                    logMessage($"Connected to destination repository '{destination.Name}' ({destination.ConnectionString})");

                    Regex filePattern = new Regex(_arguments.FilePattern);

                    foreach (DirectoryObjectMetadata file in source.ListFiles().Where(x => x.Path.Contains(".zip")))
                    {
                        string tempZipDirectoryPath = file.Path.Replace(".zip", "");
                        string tempZipDirectoryName = file.Name.Replace(".zip", "");
                        ZipFile.ExtractToDirectory(file.Path, tempZipDirectoryPath);
                        source.DeleteFile(file.Path);

                        foreach (String unzippedfile in source.ListFiles(tempZipDirectoryName).Select(x => x.Path).Where(x => filePattern.IsMatch(x)))
                        {
                            string        filename     = unzippedfile.Split(new[] { "\\" }, StringSplitOptions.None).Last();
                            List <String> currentFiles = source.ListFiles().Select(x => x.Name).ToList();
                            if (!currentFiles.Contains(filename))
                            {
                                source.MoveFile(unzippedfile, filename);
                            }
                        }

                        foreach (String gzipFile in source.ListFiles(tempZipDirectoryName).Select(x => x.Path).Where(x => x.Contains(".gz")))
                        {
                            string        fileNameConcat = tempZipDirectoryName + ".log";
                            List <String> currentFiles   = destination.ListFiles().Select(x => x.Name).ToList();
                            if (!currentFiles.Contains(fileNameConcat))
                            {
                                using (GZipStream gzipStream = new GZipStream(source.OpenFile(gzipFile), CompressionMode.Decompress))
                                {
                                    using (Stream unzippedDestination = destination.CreateFile(fileNameConcat, Repository.Directory.FileCreationMode.ThrowIfFileExists))
                                    {
                                        gzipStream.CopyTo(unzippedDestination);
                                    }
                                }
                            }
                        }

                        source.DeleteDirectory(tempZipDirectoryPath);
                    }
                }

                List <String> modified = new List <String>();
                Int32         newCount = 0;

                using (IDatabaseRepository <IHarvesterDataContext> harvester = RepositoryFactory.CreateHarvesterRepository(_harvesterArgs))
                {
                    logMessage($"Connected to database '{harvester.Name}' ({harvester.ConnectionString})");

                    IEnumerable <DirectoryObjectMetadata> sourceFiles = source.ListFiles("/");
                    Dictionary <String, DirectoryRecord>  dictionary  = harvester.DataContext.DirectoryRecords.Where(d => d.Operation.Name == Name && d.Repository.Name == source.Name).ToDictionary(d => d.FilePath);

                    Entities.Repository repository = harvester.DataContext.Repositories.First(x => x.Name == source.Name);

                    if (OperationID == 0)
                    {
                        logMessage("Warning: OperationID was not set properly. Correcting this.");
                        OperationID = harvester.DataContext.Operations.First(d => d.Name == Name).ID;
                    }

                    foreach (DirectoryObjectMetadata file in sourceFiles)
                    {
                        if (!dictionary.ContainsKey(file.Path))
                        {
                            modified.Add(file.Name);
                            newCount++;

                            harvester.DataContext.DirectoryRecords.InsertOnSubmit(new DirectoryRecord
                            {
                                OperationID      = OperationID,
                                RepositoryID     = repository.ID,
                                FilePath         = file.Path,
                                FileModifiedDate = file.ModifiedDate,
                                CreationDate     = DateTime.Now,
                                ModifiedDate     = DateTime.Now
                            });
                        }
                        else
                        {
                            DirectoryRecord element = dictionary[file.Path];

                            if (file.ModifiedDate > element.FileModifiedDate)
                            {
                                modified.Add(file.Name);
                                element.FileModifiedDate = file.ModifiedDate;
                                element.ModifiedDate     = DateTime.Now;
                            }
                        }
                    }

                    if (cancellationToken.IsCancellationRequested)
                    {
                        source.Dispose();
                        harvester.Dispose();
                        cancellationToken.ThrowIfCancellationRequested();
                    }
                    harvester.DataContext.SubmitChanges();
                }

                logMessage($"Discovered {modified.Count} files to be processed ({newCount} new and {modified.Count - newCount} updated).");

                if (modified.Count == 0)
                {
                    return;
                }

                using (IDatabaseRepository <IStatisticsDataContext> destination = RepositoryFactory.CreateStatisticsRepository(_statisticsArgs))
                {
                    logMessage($"Connected to database '{destination.Name}' ({destination.ConnectionString})");

                    StreamParser <EZProxyAudit> Parser = new StreamParser <EZProxyAudit>();

                    List <EZProxyAudit> records = modified.Select(file =>
                    {
                        logMessage($"Processing '{file}':");

                        int lineNumber = 0;

                        return(Parser.ParseStream(source.OpenFile(file)).Select(x => new EZProxyAudit
                        {
                            DateTime = x.DateTime,
                            Event = x.Event,
                            IP = x.IP,
                            Other = x.Other,
                            Session = x.Session,
                            Username = x.Username,
                            LineNumber = lineNumber++,
                        }));
                    }).SelectMany(x => x).ToList();

                    logMessage($"Records Found: {records.Count}");

                    destination.DataContext.BulkImportEZProxyAudit(records.ToDataReader(r => new object[] { r.DateTime, r.Event, r.IP, r.Username, r.Session, r.Other, r.LineNumber }));
                }
            }
        }
Ejemplo n.º 3
0
        public override void Execute(DateTime runDate, Action <string> logMessage, CancellationToken cancellationToken)
        {
            using (IDirectoryRepository source = RepositoryFactory.CreateDirectoryRepository(_directoryArgs))
            {
                logMessage($"Connected to source repository '{source.Name}' ({source.ConnectionString})");

                Regex filePattern = new Regex(@"^((ITU[.]Item_inventories[.]\d{8})|(ITU[.]Circulation_Item_Inventories[.]\d{8}))[.]txt([.]zip)?$", RegexOptions.IgnoreCase);

                var sourceFilesBeforeZip = source.ListFiles().Where(y => filePattern.IsMatch(y.Name)).ToArray();
                foreach (DirectoryObjectMetadata zipFile in sourceFilesBeforeZip.Where(x => x.Path.Contains(".txt.zip")))
                {
                    if (sourceFilesBeforeZip.All(x => x.Name != zipFile.Name.Replace(".zip", "")))
                    {
                        ZipFile.ExtractToDirectory(zipFile.Path, Path.GetDirectoryName(zipFile.Path.Replace(".txt.zip", "")));
                    }
                    //source.DeleteFile(zipFile.Path);
                }

                List <String> modified = new List <String>();
                Int32         newCount = 0;

                List <DirectoryObjectMetadata> sourceFiles = source.ListFiles("/").Where(y => filePattern.IsMatch(y.Name) && !y.Name.EndsWith(".zip")).ToList();

                using (IDatabaseRepository <IHarvesterDataContext> harvester = RepositoryFactory.CreateHarvesterRepository(_harvesterArgs))
                {
                    logMessage($"Connected to database '{harvester.Name}' ({harvester.ConnectionString})");

                    Entities.Repository repository = harvester.DataContext.Repositories.First(y => y.Name == source.Name);

                    if (OperationID == 0)
                    {
                        logMessage("Warning: OperationID was not set properly. Correcting this.");
                        OperationID = harvester.DataContext.Operations.First(d => d.Name == Name).ID;
                    }

                    Dictionary <String, DirectoryRecord> dictionary = harvester.DataContext.DirectoryRecords.Where(d => d.OperationID == OperationID && d.RepositoryID == repository.ID).ToDictionary(d => d.FilePath);

                    foreach (DirectoryObjectMetadata file in sourceFiles)
                    {
                        if (!dictionary.ContainsKey(file.Path))
                        {
                            modified.Add(file.Name);
                            newCount++;
                        }
                        else
                        {
                            DirectoryRecord element = dictionary[file.Path];

                            if (file.ModifiedDate > element.FileModifiedDate)
                            {
                                modified.Add(file.Name);
                            }
                        }
                    }
                }

                if (modified.Count == 0 && newCount == 0)
                {
                    logMessage("No Records to be processed.");
                    return;
                }

                logMessage($"Discovered {modified.Count} files to be processed ({newCount} new and {modified.Count - newCount} updated).");

                if (cancellationToken.IsCancellationRequested)
                {
                    source.Dispose();
                    cancellationToken.ThrowIfCancellationRequested();
                }

                using (IDatabaseRepository <IStatisticsDataContext> destination = RepositoryFactory.CreateStatisticsRepository(_databaseArgs))
                {
                    logMessage($"Connected to destination database '{destination.Name}' ({destination.ConnectionString})");
                    bool exceptionHandled = false;
                    List <DirectoryObjectMetadata> successfulFiles = new List <DirectoryObjectMetadata>();

                    StreamParser <WmsInventoryRecord>     Parser     = new StreamParser <WmsInventoryRecord>();
                    StreamParser <WmsInventoryRecordEdge> ParserEdge = new StreamParser <WmsInventoryRecordEdge>();
                    StreamParser <WmsInventoryRecordDiff> ParserDiff = new StreamParser <WmsInventoryRecordDiff>();
                    StreamParser <WmsInventoryRecord2018> Parser2018 = new StreamParser <WmsInventoryRecord2018>();

                    if (cancellationToken.IsCancellationRequested)
                    {
                        source.Dispose();
                        cancellationToken.ThrowIfCancellationRequested();
                    }

                    var completedFiles = modified.Select <string, (string file, DateTime RunDate, IEnumerable <IWmsInventoryRecord> records)>(file =>
                    {
                        logMessage($"Processing '{file}':");

                        Stream inputStream   = source.OpenFile(file);
                        DateTime fileRunDate = GetFileDate(file);
                        string headers       = ExtractHeader(inputStream);

                        //logMessage(headers);

                        if (IsWmsInventoryRecord(headers))
                        {
                            return(file, fileRunDate, Parser.ParseStream(inputStream));
                        }

                        if (IsWmsInventoryRecord2018(headers))
                        {
                            return(file, fileRunDate, Parser2018.ParseStream(inputStream));
                        }

                        if (IsWmsInventoryRecordDiff(headers))
                        {
                            return(file, fileRunDate, ParserDiff.ParseStream(inputStream));
                        }

                        if (IsWmsInventoryRecordEdge(headers))
                        {
                            return(file, fileRunDate, ParserEdge.ParseStream(inputStream));
                        }

                        throw new InvalidDataException($"Header format not recognized: '{headers}'");
                    }).Select(wmsFileRecord =>
                    {
                        var parsed = wmsFileRecord.records.Select(wms => new InventoryRecord
                        {
                            OclcNumber          = wms.OclcNumber,
                            Title               = ParseTitle(wms.Title),
                            Author              = wms.Author,
                            MaterialFormat      = wms.MaterialFormat.ToString(),
                            Barcode             = wms.Barcode,
                            Cost                = wms.Cost,
                            LastInventoriedDate = wms.LastInventoriedDate,
                            DeletedDate         = wms.ItemDeletedDate,
                            ItemType            = wms.ItemType.ToString(),
                            CallNumber          = wms.CallNumber,
                            ShelvingLocation    = wms.ShelvingLocation?.ToString(),
                            CurrentStatus       = wms.CurrentStatus?.ToString(),
                            Description         = wms.Description,
                            RunDate             = wmsFileRecord.RunDate,
                            Anomalous           = AnomalousBarcode(wms.Barcode),
                        }).Where(y => y.Title != null || y.Barcode != null).GroupBy(x => new { x.OclcNumber, x.Barcode, x.RunDate }).Select(x => x.First()).ToArray();

                        logMessage($"Records Found: {parsed.Length}");

                        if (cancellationToken.IsCancellationRequested)
                        {
                            logMessage("Operation was cancelled");
                            exceptionHandled = true;
                            return(null);
                        }

                        if (parsed.Length <= 0)
                        {
                            logMessage("Failed to parse properly and return any meaningful records. This might mean that non of the parsed records had a Title or Barcode.");
                            exceptionHandled = true;
                            return(null);
                        }

                        try
                        {
                            destination.DataContext.BulkImportInventory(
                                parsed.ToDataReader(r => new object[] { r.OclcNumber, r.Title, r.MaterialFormat, r.Author, r.Barcode, r.Cost, r.LastInventoriedDate, r.DeletedDate, r.ItemType, r.CallNumber, r.ShelvingLocation, r.CurrentStatus, r.Description, r.RunDate, r.Anomalous }));
                        }

                        catch (SqlException ex)
                        {
                            logMessage(ex.Message);
                            if (ex.InnerException != null)
                            {
                                logMessage(ex.InnerException.ToString());
                            }
                            logMessage("Sql Server was most likely put into an unusable state after this exception and thus the whole operation was canceled.");
                            exceptionHandled = true;
                        }

                        return(sourceFiles.First(x => x.Name == wmsFileRecord.file));
                    }).Where(x => x != null);

                    foreach (var success in completedFiles)
                    {
                        successfulFiles.Add(success);
                    }

                    UpdateHarvesterRecord(logMessage, successfulFiles, source.Name, _harvesterArgs);

                    if (exceptionHandled)
                    {
                        destination.DataContext.Connection.Close();
                        destination.DataContext.Dispose();
                        throw new Exception("An Exception was encountered. At least one file failed");
                    }
                }
            }
        }
        /// <inheritdoc />
        public override void Execute(DateTime runDate, Action <string> logMessage, CancellationToken cancellationToken)
        {
            using (IDatabaseRepository <IStatisticsDataContext> destination = RepositoryFactory.CreateStatisticsRepository(statisticsArguments))
            {
                destination.DataContext.LogMessage = logMessage;

                logMessage($"Connected to destination database '{destination.Name}' ({destination.ConnectionString})");

                using (ICounterRepository source = RepositoryFactory.CreateCounterRepository(counterRepoArguments))
                {
                    source.LogMessage = logMessage;
                    logMessage($"Connected to source repository '{source.Name}' ({source.ConnectionString})");

                    Dictionary <CounterReport, List <CounterRecord> > preRecordsReports = source.AvailableReports.ToDictionary(report => report, report => source.RequestRecords(runDate, report).ToList());

                    if (!preRecordsReports.Any(x => x.Value.Count > 0))
                    {
                        logMessage("No Records for this date");
                        return;
                    }

                    if (localJsonArguments != null)
                    {
                        foreach (var reportRecord in preRecordsReports)
                        {
                            using (IDirectoryRepository directoryRepo = RepositoryFactory.CreateDirectoryRepository(localJsonArguments))
                            {
                                using (Stream stream = directoryRepo.CreateFile($"{source.Name} {runDate:yyyy-MM-dd} {reportRecord.Key}.json", FileCreationMode.Overwrite))
                                {
                                    JsonSerializer serializer = JsonSerializer.Create();
                                    using (JsonTextWriter jsonTextWriter = new JsonTextWriter(new StreamWriter(stream)))
                                    {
                                        serializer.Serialize(jsonTextWriter, reportRecord.Value);
                                    }
                                }
                            }
                        }
                    }

                    var preRecords = preRecordsReports.SelectMany(x => x.Value).ToList();

                    //Add Metrics of vendorRecords to single vendor record
                    foreach (var recordsByRunDate in preRecords.GroupBy(x => x.RunDate))
                    {
                        if (recordsByRunDate.Any(x => x.ItemType == ItemType.Vendor))
                        {
                            foreach (var vendorRecord in recordsByRunDate.Where(x => x.ItemType == ItemType.Vendor))
                            {
                                vendorRecord.Identifiers = new[] { new CounterIdentifier {
                                                                       IdentifierType = IdentifierType.Proprietary, IdentifierValue = source.Name
                                                                   } };
                            }
                        }
                        else
                        {
                            preRecords.Add(new CounterRecord {
                                ItemName = source.Name, ItemType = ItemType.Vendor, RunDate = recordsByRunDate.Select(y => y.RunDate).First(), Identifiers = new[] { new CounterIdentifier {
                                                                                                                                                                         IdentifierType = IdentifierType.Proprietary, IdentifierValue = source.Name
                                                                                                                                                                     } }, Metrics = new CounterMetric[] { }
                            });
                        }
                    }

                    if (preRecords.Any(x => x.ItemType == ItemType.Database))
                    {
                        foreach (CounterRecord record in preRecords)
                        {
                            if (record.ItemType != ItemType.Database)
                            {
                                continue;
                            }

                            record.ItemPlatform = source.Name;
                            record.Identifiers  = new[] { new CounterIdentifier {
                                                              IdentifierType = IdentifierType.Database, IdentifierValue = record.ItemName
                                                          } };
                        }
                    }
                    else
                    {
                        preRecords.Add(new CounterRecord {
                            ItemName = source.Name, ItemType = ItemType.Database, ItemPlatform = source.Name, RunDate = runDate, Identifiers = new[] { new CounterIdentifier {
                                                                                                                                                           IdentifierType = IdentifierType.Database, IdentifierValue = source.Name
                                                                                                                                                       } }, Metrics = new CounterMetric[] { }
                        });
                    }

                    var where = preRecords.Where(x => x.ItemType == ItemType.Vendor);

                    Console.WriteLine(where.Count());

                    var records = preRecords
                                  .Where(x => !string.IsNullOrEmpty(x.ItemName))
                                  .Select(SanitizeIdentifiers)
                                  .GroupBy(x => x, new CounterRecordComparer())
                                  .Select(AggregateDuplicates)
                                  .Select((r, i) => new { Index = i, Record = r });

                    var counterRecords = records.GroupBy(x => x.Record.RunDate).ToList();

                    logMessage($"{preRecords.Count + 1} Total Records");
                    logMessage($"{recordswithNoIdentifiers} Records with no Identifiers");
                    logMessage($"{counterRecords.Sum(x => x.Count())} Unique Records");

                    if (cancellationToken.IsCancellationRequested)
                    {
                        source.Dispose();
                        destination.DataContext.Connection.Close();
                        destination.DataContext.Dispose();
                        destination.Dispose();
                        cancellationToken.ThrowIfCancellationRequested();
                    }

                    foreach (var counterGroup in counterRecords)
                    {
                        destination.DataContext.BulkImportCounterTransactions(
                            counterGroup
                            .ToDataReader(r => new object[] { null, null, null, r.Index, r.Record.ItemName, r.Record.ItemPlatform, r.Record.ItemType, r.Record.RunDate }),
                            counterGroup
                            .SelectMany(r => r.Record.Identifiers.Bind(a => a.Select(i => new { r.Index, Identifier = i, r.Record.ItemType })))
                            .ToDataReader(i => new object[] { i.Index, i.Identifier.IdentifierType, i.Identifier.IdentifierValue, i.ItemType }),
                            counterGroup
                            .SelectMany(r => r.Record.Metrics.Bind(a => a.Select(m => new { r.Index, Metric = m })))
                            .ToDataReader(m => new object[] { m.Index, m.Metric.MetricType, m.Metric.MetricValue }));

                        //Add Record of report being run
                        using (var harvester = RepositoryFactory.CreateHarvesterRepository(harvesterArguments))
                        {
                            if (OperationID == 0)
                            {
                                logMessage("Warning: OperationID was not set properly. Correcting this.");
                                OperationID = harvester.DataContext.Operations.First(d => d.Name == Name).ID;
                            }

                            Entities.Repository repository = harvester.DataContext.Repositories.First(x => x.Name == source.Name);

                            foreach (CounterReport report in preRecordsReports.Keys)
                            {
                                if (!harvester.DataContext.CounterOperationRecords.Any(x => x.OperationID == OperationID && x.RunDate == runDate && x.Report == report.ToString()))
                                {
                                    harvester.DataContext.CounterOperationRecords.InsertOnSubmit(new CounterOperationRecord
                                    {
                                        OperationID  = OperationID,
                                        RepositoryID = repository.ID,
                                        RunDate      = runDate,
                                        Report       = report.ToString(),
                                        ExecutedDate = DateTime.Now
                                    });
                                }
                                else
                                {
                                    harvester.DataContext.CounterOperationRecords.First(x => x.OperationID == OperationID && x.RunDate == runDate && x.Report == report.ToString()).ExecutedDate = DateTime.Now;
                                }
                            }

                            harvester.DataContext.SubmitChanges();
                        }
                    }
                }
            }
        }
        public override void Execute(DateTime runDate, Action <string> logMessage, CancellationToken cancellationToken)
        {
            using (IDirectoryRepository source = RepositoryFactory.CreateDirectoryRepository(_directoryArgs))
            {
                logMessage($"Connected to source repository '{source.Name}' ({source.ConnectionString})");

                List <string> modified = new List <string>();
                int           newCount = 0;

                using (IDatabaseRepository <IHarvesterDataContext> harvester = RepositoryFactory.CreateHarvesterRepository(_harvesterArgs))
                {
                    logMessage($"Connected to database '{harvester.Name}' ({harvester.ConnectionString})");

                    Regex filePattern = new Regex(_arguments.FilePattern, RegexOptions.IgnoreCase);
                    IEnumerable <DirectoryObjectMetadata> sourceFiles = source.ListFiles().Where(x => filePattern.IsMatch(x.Name)).ToArray();
                    Dictionary <string, DirectoryRecord>  dictionary  = harvester.DataContext.DirectoryRecords.Where(d => d.Operation.Name == Name && d.Repository.Name == source.Name).ToDictionary(d => d.FilePath);

                    Entities.Repository repository = harvester.DataContext.Repositories.First(x => x.Name == source.Name);

                    if (OperationID == 0)
                    {
                        logMessage("Warning: OperationID was not set properly. Correcting this.");
                        OperationID = harvester.DataContext.Operations.First(d => d.Name == Name).ID;
                    }

                    foreach (DirectoryObjectMetadata file in sourceFiles)
                    {
                        if (!dictionary.ContainsKey(file.Path))
                        {
                            modified.Add(file.Path);
                            newCount++;
                        }
                        else
                        {
                            DirectoryRecord element = dictionary[file.Path];

                            if (file.ModifiedDate > element.FileModifiedDate)
                            {
                                modified.Add(file.Path);
                            }
                        }
                    }

                    if (cancellationToken.IsCancellationRequested)
                    {
                        source.Dispose();
                        harvester.Dispose();
                        cancellationToken.ThrowIfCancellationRequested();
                    }


                    logMessage($"Discovered {modified.Count} files to be processed ({newCount} new and {modified.Count - newCount} updated).");

                    if (modified.Count == 0)
                    {
                        return;
                    }

                    List <StatistaRecord> records = new List <StatistaRecord>();

                    foreach (string file in modified)
                    {
                        logMessage($"Processing Statista file: {file}");

                        ExcelPackage   package   = new ExcelPackage(new FileInfo(file));
                        ExcelWorksheet worksheet = package.Workbook.Worksheets[1];
                        ExcelRange     cells     = worksheet.Cells;

                        List <string> Fields = new List <string>();

                        for (int k = 1; k <= worksheet.Dimension.Columns; k++)
                        {
                            Fields.Add(cells[1, k].Value.ToString());
                        }

                        Dictionary <string, int> fieldMap = Fields.Select((x, i) => new { index = i, value = x }).ToDictionary(x => x.value.ToLower(), x => x.index);

                        List <List <string> > lines = new List <List <string> >();
                        for (int i = 2; i <= worksheet.Dimension.Rows; i++)
                        {
                            List <string> line = new List <string>();
                            for (int j = 1; j <= worksheet.Dimension.Columns; j++)
                            {
                                line.Add(cells[i, j].Text);
                            }

                            lines.Add(line);
                        }

                        int   recordsSkipped   = 0;
                        int[] necessaryIndices = new[] { fieldMap["date"], fieldMap["title"], fieldMap["type of access"] };
                        foreach (List <string> line in lines)
                        {
                            if (line.All(EmptyField) || line.Where((x, i) => necessaryIndices.Contains(i)).Any(EmptyField))
                            {
                                recordsSkipped++;
                                continue;
                            }

                            for (int i = 0; i < line.Count; i++)
                            {
                                if (EmptyField(line[i]))
                                {
                                    line[i] = null;
                                }
                            }

                            records.Add(new StatistaRecord
                            {
                                Date         = ParseDate(line[fieldMap["date"]]),
                                Title        = line[fieldMap["title"]],
                                TypeofAccess = line[fieldMap["type of access"]],

                                ID           = fieldMap.ContainsKey("id") ? line[fieldMap["id"]] : null,
                                ContentType  = fieldMap.ContainsKey("content type") ? line[fieldMap["content type"]] : null,
                                MainIndustry = fieldMap.ContainsKey("main industry") ? line[fieldMap["main industry"]] : null,
                                Content      = fieldMap.ContainsKey("content") ? line[fieldMap["content"]] : null,
                                Subtype      = fieldMap.ContainsKey("subtyp") ? line[fieldMap["subtyp"]] : null,
                            });
                        }

                        logMessage($"\t{records.Count}/{records.Count + recordsSkipped} records processed.");
                    }

                    using (IDatabaseRepository <IStatisticsDataContext> destination = RepositoryFactory.CreateStatisticsRepository(_statisticsArgs))
                    {
                        destination.DataContext.BulkImportStatista(
                            records.ToDataReader(r => new object[] { r.ID, r.Date, r.ContentType, r.MainIndustry, r.Title, r.TypeofAccess, r.Content, r.Subtype }));
                    }

                    UpdateHarvesterRecord(logMessage, sourceFiles, source.Name, _harvesterArgs);
                }
            }
        }
Ejemplo n.º 6
0
 public void Add(Entities.Repository repository)
 {
     _context.Add(repository);
 }