private void SetErrorState(SqlDatabaseConnectorJobConfiguration config, Exception e) { try { var jobState = GetCurrentJobState(config); jobState.SetErrorState(e); if (jobState.State != JobState.JobState.Error) { jobState.LastWorkingState = jobState.State; } jobState.State = JobState.JobState.Error; _stateService.SaveState(jobState); } catch (Exception newExceptione) { Log.Error(newExceptione, "Error when saving error state: {message} for job {jobName}", newExceptione.Message, config.JobName); var errorState = new DatabaseJobState { Name = config.JobName, State = JobState.JobState.Paused, LastExecutionDate = DateTime.UtcNow, ErrorCount = 1, LastWorkingState = JobState.JobState.Paused, RecentErrorDate = DateTime.UtcNow, Message = $"Job threw an exception, {e.Message}, and then threw an exception when trying to save the error state: {newExceptione.Message}" }; _stateService.SaveState(errorState); } }
private void SetUpJobStateRecursively(SqlDatabaseConnectorJobConfiguration config, JobState.JobState jobState, string description = "") { var state = new DatabaseJobState { Name = config.JobName, State = jobState, LastExecutionDate = DateTime.UtcNow }; if (!string.IsNullOrEmpty(description)) { state.Fields.Add(new Field("Description", description)); } foreach (var changeTable in config.ChangeTables) { state.SetChangetableVersionThreshold(changeTable.TableName, GetIntegration(config).GetLastChangeVersion(config, changeTable)); } foreach (var eventTable in config.EventTables) { state.SetChangetableVersionThreshold(eventTable.TableName, GetIntegration(config).GetLastChangeVersion(config, eventTable)); } _stateService.SaveState(state); foreach (var relatingSource in config.ForeignSources) { SetUpJobStateRecursively(relatingSource.SqlDatabaseConnectorConfiguration, JobState.JobState.ForeignTable, $"{(string.IsNullOrEmpty(description) ? "" : description + " -> ")} Relating to {config.MainTable.TableName}.{relatingSource.Relation.Key} with {relatingSource.Relation.Value}"); } }
public void CustomFieldsArePersistedWhenCopyingToANewState() { var state1 = new DatabaseJobState { Name = "Test1", State = DatabaseJobState.JobState.Discovering }; var fieldName = "capella"; var fieldValue = "first"; state1.SetValue(fieldName, fieldValue); var setField = state1.GetValue(fieldName); setField.Should().Be(fieldValue); var fields = state1.Fields; fields.Should().Contain(x => x.Name == fieldName && x.Value == fieldValue); var state2 = new DatabaseJobState(state1); state2.Fields.Should().AllBeEquivalentTo(state1.Fields); }
public static JobProgressDto GetProgressDto(this JobState state) { //todo: smarter logic var dbstate = new DatabaseJobState(state); return(new JobProgressDto() { Total = dbstate.LastDiscoverBatchCount, Current = dbstate.BatchCount }); }
private SourceChanges FetchDocumentsUsingIncremental(SqlDatabaseConnectorJobConfiguration config, DatabaseJobState jobStateBase) { //Not 100% threadsafe. But its good enough. If two concurrent threads do a discover, it'll still work fine var stateRightNow = new DatabaseJobState(_stateService.LoadState(config.JobName)); if (stateRightNow.State == JobState.JobState.Discovering) { Log.Logger.Information( $"{config.JobName}: State in DISCOVER phase. Another job is already at it. Doing nothing."); return(new SourceChanges( )); } jobStateBase.Status = JobStatus.Ok; jobStateBase.State = JobState.JobState.Discovering; _stateService.SaveState(jobStateBase); Log.Logger.Information($"{config.JobName}: Fetching incremental documents"); var idsToFetch = FetchChangedIdsRecursively(config, EventType.Add, false, jobStateBase).ToList(); IList <AddDocument> adds = null; if (idsToFetch.Any()) { var numBatches = _workTaskService.WriteTasks(config.JobName, config.BatchSize, idsToFetch); jobStateBase.BatchCount = numBatches; jobStateBase.LastDiscoverDate = DateTime.UtcNow; jobStateBase.LastDiscoverBatchCount = numBatches; if (idsToFetch.Count <= config.BatchSize) { var task = _workTaskService.GetNextTask(config.JobName); jobStateBase.BatchCount--; adds = FetchDocumentsUsingWorkTask(config, task).Adds; } } var plainState = _stateService.LoadState(config.JobName); jobStateBase = new DatabaseJobState(plainState); var idFieldsToDelete = FetchChangedIdsRecursively(config, EventType.Delete, true, jobStateBase) .Select(x => new Field(config.MainTable.PrimaryKeyName, x)).ToList(); jobStateBase.State = JobState.JobState.IncrementalCrawling; _stateService.SaveState(jobStateBase); return(new SourceChanges( adds ?? new List <AddDocument>(), idFieldsToDelete)); }
private bool ResetIfConfigured(SqlDatabaseConnectorJobConfiguration config, DatabaseJobState jobStateBase) { if (config.ResetEveryXHour <= 0) { return(false); } var hoursSinceLastCrawl = (DateTime.UtcNow - jobStateBase.InitDate.Value).TotalHours; if (hoursSinceLastCrawl >= config.ResetEveryXHour) { ResetConnector(config.JobName); return(true); } return(false); }
/// <summary> /// If the current state is in Discovering that means another worker is currently trying to discover /// if there are any changes in the source. /// Another option is that the previous worker unexpectively crashed. To repair such scenario /// a state can only be in Discovering phase for 30 iterations before being reset to IncrementalCrawl /// </summary> /// <param name="jobStateBase"></param> private SourceChanges HandleDiscoveringState(DatabaseJobState jobStateBase) { if (ErrorCountLimit - jobStateBase.DiscoverCount > 0) { Log.Logger.Information($"{jobStateBase.Name}: State in DISCOVER phase (for {jobStateBase.DiscoverCount} job executions. Will reset to IncrementalCrawl in {ErrorCountLimit - jobStateBase.DiscoverCount} executions"); jobStateBase.DiscoverCount++; } else { Log.Logger.Warning($"{jobStateBase.Name}: State in DISCOVER phase, will reset to IncrementalCrawling"); jobStateBase.State = JobState.JobState.IncrementalCrawling; jobStateBase.DiscoverCount = 0; } jobStateBase.Status = JobStatus.Ok; _stateService.SaveState(jobStateBase); return(new SourceChanges()); }
public void ResetConnector(string jobName) { Log.Information($"Resetting state for job {jobName}"); var state = _stateService.LoadState(jobName); if (state == null) { return; } var jobState = new DatabaseJobState(state) { State = JobState.JobState.Paused }; _stateService.SaveState(jobState); _workTaskService.DeleteAllTasks(jobState.Name); _stateService.DeleteState(jobState.Name); }
public void CanSetFieldsThatAreNotAProperty() { var state = new DatabaseJobState { Name = "Test1", State = DatabaseJobState.JobState.Discovering }; var fieldName = "capella"; var fieldValue = "first"; state.SetValue(fieldName, fieldValue); var setField = state.GetValue(fieldName); setField.Should().Be(fieldValue); var fields = state.Fields; fields.Should().Contain(x => x.Name == fieldName && x.Value == fieldValue); }
public void CanSerializeReflectionState() { var state = new DatabaseJobState { Name = "Test1", State = DatabaseJobState.JobState.InitialCrawling, BatchCount = 2, DiscoverCount = 4, ErrorCount = 11, LastWorkingState = DatabaseJobState.JobState.Discovering, Message = "Hej", RecentErrorMessage = "Hej hej heje hej", RecentErrorStackTrace = "alskdjaslk jalskjd alksjd alsjd lkasjdlka jsdlkjaslkdj alskjd lakjsd lakjsd kalsjdaslkdj aslkjdas lkjasdlkjas dlkjas dlkasdjas dlkj", RecentErrorDate = new DateTime?(new DateTime(2000, 1, 1)), IsActive = true }; state.SetValue("custom_field", ""); var fields = state.Fields; fields.Should().HaveCount(14); var state2 = new DatabaseJobState { Name = state.Name, Fields = fields }; state.Should().Be(state2); var error = state.GetJobError(); error.Should().NotBeNull(); error.Date.Should().Be(state.RecentErrorDate.Value); error.Message.Should().Be(state.RecentErrorMessage); state.GetCustomFields().Count.Should().Be(1); }
public void CanSerializeState() { var state = new DatabaseJobState { Name = "Test1", State = DatabaseJobState.JobState.Discovering }; state.State = DatabaseJobState.JobState.InitialCrawling; state.BatchCount = 3; state.DiscoverCount = 4; state.SetErrorState(new Exception("sadsd")); state.Message = "hejaasd jlkasdlkjas"; var fields = state.Fields; fields.Should().HaveCount(12); state.RecentErrorDate = state.RecentErrorDate.Value.AddTicks(-(state.RecentErrorDate.Value.Ticks % TimeSpan.TicksPerSecond)); var state2 = new DatabaseJobState(state); state.Should().Be(state2); }
private SourceChanges HandleState(DatabaseJobState jobStateBase, SqlDatabaseConnectorJobConfiguration configuration) { switch (jobStateBase.State) { case JobState.JobState.Paused: return(new SourceChanges()); case JobState.JobState.InitialCrawling: return(HandleInitialCrawlingState(jobStateBase, configuration)); case JobState.JobState.Discovering: return(HandleDiscoveringState(jobStateBase)); case JobState.JobState.Error: return(HandleErrorState(jobStateBase, configuration)); case JobState.JobState.IncrementalCrawling: return(HandleIncrementalCrawling(configuration, jobStateBase)); default: throw new NotSupportedException($"{jobStateBase.Name}: State is in a non-supported state: {jobStateBase.State}."); } }
/// <summary> /// Will set the state to Discover and start to discover all document ids /// the Ids will be serialized to batch files /// </summary> /// <param name="jobStateBase"></param> /// <param name="config"></param> private SourceChanges HandleInitialCrawlingState(DatabaseJobState jobStateBase, SqlDatabaseConnectorJobConfiguration config) { jobStateBase.State = JobState.JobState.Discovering; jobStateBase.Status = JobStatus.Ok; _stateService.SaveState(jobStateBase); try { //Discover all ids var discovedIds = GetIntegration(config).DiscoverInitialIds(config); //Serialize them to batches var numBatches = _workTaskService.WriteTasks(config.JobName, config.BatchSize, discovedIds.ToList()); jobStateBase.BatchCount = numBatches; jobStateBase.State = JobState.JobState.IncrementalCrawling; jobStateBase.LastDiscoverBatchCount = numBatches; _stateService.SaveState(jobStateBase); return(HandleIncrementalCrawling(config, jobStateBase)); } catch (Exception e) { jobStateBase.SetErrorState(e); _stateService.SaveState(jobStateBase); throw; } }
/// <summary> /// If the job currently is in an Error state it will be reset to its previously working state /// With an exception. If the previously state was Discover mode it will be reset to IncrementalCrawling /// Why? Because there might be a few document batches that needs to be taken care of /// </summary> /// <param name="jobStateBase"></param> /// <param name="configuration"></param> /// <returns></returns> private SourceChanges HandleErrorState(DatabaseJobState jobStateBase, SqlDatabaseConnectorJobConfiguration configuration) { if (jobStateBase.ErrorCount > ErrorCountLimit) { var pausingMessage = $"Job has been paused due to the Error count limit's been reached: {ErrorCountLimit}. The error count has now been reset to 0."; jobStateBase.State = JobState.JobState.Paused; jobStateBase.Message = pausingMessage; jobStateBase.ErrorCount = 0; _stateService.SaveState(jobStateBase); Log.Logger.Warning(pausingMessage); return(new SourceChanges()); } var lastKnownState = jobStateBase.LastWorkingState; if (lastKnownState == JobState.JobState.Discovering) { lastKnownState = JobState.JobState.IncrementalCrawling; } Log.Logger.Warning($"{jobStateBase.Name}: State is in ERROR mode. Retrying (retry number {jobStateBase.ErrorCount}) by setting it to {lastKnownState}"); jobStateBase.State = lastKnownState; jobStateBase.ErrorCount++; _stateService.SaveState(jobStateBase); return(HandleState(jobStateBase, configuration)); }
private List <string> FetchChangedIdsRecursively(SqlDatabaseConnectorJobConfiguration config, EventType eventType, bool saveState, DatabaseJobState stateBase) { var changedIds = GetIntegration(config).DiscoverIncrementalIds(config, ref stateBase, eventType).ToList(); if (saveState) { _stateService.SaveState(stateBase); } foreach (var foreignSource in config.ForeignSources) { var ids = FetchChangedIdsRecursively(foreignSource.SqlDatabaseConnectorConfiguration, eventType, saveState, stateBase); if (!ids.Any()) { continue; } var mainTableIds = GetIntegration(config).DiscoverMainTableIds(config, foreignSource.Relation.Key, ids.ToList()); changedIds.AddRange(mainTableIds); } return(changedIds.Distinct().ToList()); }
public IEnumerable <DataChange> DiscoverChanges(SqlDatabaseConnectorJobConfiguration config, DatabaseJobState stateBase) { return(DiscoverDataChangesUsingEventTables(config, stateBase)); }
public IEnumerable <string> DiscoverIncrementalIds(SqlDatabaseConnectorJobConfiguration config, ref DatabaseJobState stateBase, EventType changeType) { var discoverer = _changeDiscoverers.FirstOrDefault(x => x.CanHandle(config)); if (discoverer == null) { Log.Debug("{jobName}: No incremental strategy for {changeType} (no discoverer registered that handles the configuration), no delta crawls will be possible.", config.JobName, changeType); return(new List <string>()); } var dataChanges = discoverer.DiscoverChanges(config, stateBase).ToList(); foreach (var dataChange in dataChanges) { if (!dataChange.AddedIds.Any() && !dataChange.DeletedIds.Any()) { continue; } stateBase.SetChangetableVersionThreshold(dataChange.Table.TableName, dataChange.HighestVersionAdded, true); } return(dataChanges.SelectMany( dataChange => GetDataChangesInMainTable(config, dataChange, changeType))); }
private IEnumerable <DataChange> DiscoverDataChangesUsingEventTables(SqlDatabaseConnectorJobConfiguration config, DatabaseJobState stateBase) { foreach (var eventTable in config.EventTables) { var dataChange = new DataChange(new TableDetail() { TableName = eventTable.TableName, PrimaryKeyName = eventTable.MainTableIdColumnName }); using (var conn = new SqlConnection(config.ConnectionString)) using (var cmd = conn.CreateCommand()) { cmd.CommandText = string .Format( "SELECT {0}, {1}, {2} FROM {3} WHERE {0} > {4} ORDER BY {0} asc", eventTable.EventSequenceColumnName, eventTable.MainTableIdColumnName, eventTable.EventTypeColumnName, eventTable.TableName, stateBase.GetChangetableVersionThreshold(eventTable.TableName)); Log.Logger.Debug($"{config.JobName}: Getting data changes from changetables with: {cmd.CommandText}"); try { conn.Open(); using (var reader = cmd.ExecuteReader()) { while (reader.Read()) { dataChange.AddChange(DataRowToChangeTableEvent(reader, eventTable)); } } } catch (SqlException e) { Log.Logger.Error($"{config.JobName}: SQL error when executing {cmd.CommandText}", e); throw; } } yield return(dataChange); } }
private SourceChanges HandleIncrementalCrawling(SqlDatabaseConnectorJobConfiguration configuration, DatabaseJobState jobStateBase) { //try to get the first file of batch ids var workTask = _workTaskService.GetNextTask(configuration.JobName); if (workTask != null) { Log.Logger.Information( $"{jobStateBase.Name}: State in INCREMENTAL CRAWLING phase (still {jobStateBase.BatchCount} batches left)"); if (jobStateBase.BatchCount > 0) { jobStateBase.BatchCount--; } jobStateBase.Status = JobStatus.Ok; _stateService.SaveState(jobStateBase); return(FetchDocumentsUsingWorkTask(configuration, workTask)); } if (ResetIfConfigured(configuration, jobStateBase)) { return(new SourceChanges()); } return(FetchDocumentsUsingIncremental(configuration, jobStateBase)); }
/// <summary> /// Discover all changes listed in the change tables /// </summary> /// <param name="config"></param> /// <param name="stateBase"></param> /// <returns></returns> private IEnumerable <DataChange> DiscoverDataChangesUsingChangetables(SqlDatabaseConnectorJobConfiguration config, DatabaseJobState stateBase) { foreach (var changeTable in config.ChangeTables) { var dataChange = new DataChange(changeTable); using (var conn = new SqlConnection(config.ConnectionString)) using (var cmd = conn.CreateCommand()) { cmd.CommandText = string.Format( "SELECT * FROM CHANGETABLE (CHANGES {0}, {1}) as CT ORDER BY SYS_CHANGE_VERSION asc", changeTable.TableName, stateBase.GetChangetableVersionThreshold(changeTable.TableName)); Log.Logger.Debug($"{config.JobName}: Getting data changes from changetables with: {cmd.CommandText}"); try { conn.Open(); using (var reader = cmd.ExecuteReader()) { while (reader.Read()) { dataChange.AddChange(DataRowToChangeTableEvent(reader, changeTable)); } } } catch (SqlException e) { Log.Error(e, $"{config.JobName}: SQL error when executing {cmd.CommandText}"); throw; } } yield return(dataChange); } }