public void Recover(int failedId) { Dictionary <int, Replica> replicasCopy; lock (replicas) { replicasCopy = new Dictionary <int, Replica>(replicas); } Replica r = CreateReplica(failedId); ReplicaState repState = otherReplicasStates[failedId]; //get the last state of crashed replica Dictionary <string, OriginState> os = repState.InputStreamsIds; Console.WriteLine($"Started to recover replica {failedId} from state {repState}"); r.LoadState(repState); AddReplica(r); allReplicas[failedId] = this; foreach (string opName in os.Keys) { var sentIds = os[opName].SentIds; // Only keeps the last id sent to each destination //for each operator ask a re-sent if (opName == this.info.ID) { continue; } for (int j = 0; j < sentIds.Count; j++) { while (true) { try { Console.WriteLine($"Asking for resend to {opName} ({j})"); inputReplicas[opName][j].Resend(sentIds[j], this.info.ID, failedId, j, SelfURL); break; } catch (Exception e) { Console.WriteLine("Resending failed. Trying again. Stay positive." + e.Message); } } } } // Console.WriteLine("Phase 1 completed: Tuples were resent."); foreach (string opName in inputReplicas.Keys) { for (int i = 0; i < inputReplicas[opName].Count; i++) { try { inputReplicas[opName][i].ReRoute(this.adresses[failedId], this.SelfURL); } catch (Exception e) { Console.WriteLine("ReplicaManager.Recover: Reroute of input replicas failed " + e.Message); } } } // Console.WriteLine("Phase 2 completed: Input replicas were rerouted."); foreach (string opName in outputReplicas.Keys) { for (int i = 0; i < outputReplicas[opName].Count; i++) { try { outputReplicas[opName][i].ReRoute(this.adresses[failedId], this.SelfURL); } catch (Exception e) { Console.WriteLine("ReplicaManager.Recover: Reroute of output replicas failed" + e.Message); } } } // Console.WriteLine("Phase 3 completed: Output replicas were rerouted."); try { puppetMaster.ReRoute(this.info.ID, failedId, this.SelfURL); } catch (Exception e) { Console.WriteLine("ReplicaManager.Recover: Reroute of puppet master failed" + e.Message); } //Console.WriteLine("Phase 4 completed: Puppet master was rerouted."); adresses[failedId] = SelfURL; Console.WriteLine("MISSION COMPLETED: all recovered!"); if (repState.IsFrozen) { r.Freeze(); } if (replicas.First().Value.processingState) { r.Start(); } r.Init(); //resend }
public ReplicaManager(Replica rep, OperatorInfo info) { rep.Init(); this.SelfURL = rep.SelfURL; this.replicas = new Dictionary <int, Replica>(); this.replicas.Add(rep.ID, rep); this.adresses = info.Addresses; this.info = info; this.inputReplicas = new Dictionary <string, List <IReplica> >(); this.outputReplicas = new Dictionary <string, List <IReplica> >(); this.otherReplicasStates = new List <ReplicaState>(new ReplicaState[adresses.Count]); //await Task.Delay(10000); var initialState = rep.GetState(); for (int i = 0; i < info.Addresses.Count; i++) { otherReplicasStates[i] = initialState; } this.pfd = new PerfectFailureDetector(); this.pfd.NodeFailed += OnFail; var initTask = Task.Run(async() => { this.allReplicas = (await Helper.GetAllStubs <IReplica>( // hack to not get his own stub info.Addresses.Select((address) => (rep.SelfURL != address ? address : null)).ToList())) .ToList(); allReplicas[rep.ID] = this; for (int i = 0; i < info.Addresses.Count; i++) { if (i == rep.ID) { continue; } pfd.StartMonitoringNewNode(info.Addresses[i], allReplicas[i]); } foreach (var op in info.InputReplicas.Keys) { this.inputReplicas[op] = (await Helper.GetAllStubs <IReplica>(info.InputReplicas[op])).ToList(); } foreach (var op in info.OutputOperators) { this.outputReplicas[op.ID] = (await Helper.GetAllStubs <IReplica>(op.Addresses)).ToList(); } }); propagateStateTimer = new Timer((e) => { Dictionary <int, Replica> replicasCopy; lock (replicas) { replicasCopy = new Dictionary <int, Replica>(replicas); } foreach (var repId in replicasCopy.Keys) { PropagateState(repId); } }, null, PROPAGATE_STATE_PERIOD, PROPAGATE_STATE_PERIOD); puppetMaster = (ILogger)Activator.GetObject(typeof(ILogger), info.MasterURL); Console.Title = $"{rep.OperatorId} ({rep.ID})"; }