internal void RehashESRBlocksFromFailedHosts() { List<string> exchangeworkload = new List<string>(); int minblockcount = 0; AbortESRBlocksFromFailedHosts(exchangeworkload, out minblockcount); if (badHosts.Count >= failoverShared.dc.Replication) { throw new Exception("The number of machines removed >= Replication factor"); } //if there is anything to rehash. if (exchangeworkload.Count > 0) { lock (blockStatus) { blockStatus.Add(cID, 0); } FailoverInfo failover = new FailoverInfo(failoverShared.dc); childFailovers.Add(cID, failover); int newblockcount = minblockcount; for (; ; ) { newblockcount = AELight.NearestPrimeLE(newblockcount - 1); if (newblockcount != failoverShared.dc.slave.zblocks.count) { break; } } #if FAILOVER_DEBUG Log("rehashing...newblockcount:" + newblockcount.ToString() + ";cid:" + cID.ToString()); #endif failover.CreateBlocks(newblockcount, exchangeworkload, null, null, null, failoverShared, new List<string>(goodHosts.Keys).ToArray(), new List<string>(badHosts.Keys).ToArray(), true); //rehash=true int childcid = cID; //! System.Threading.Thread th = new System.Threading.Thread(new System.Threading.ThreadStart(delegate() { failover.ExecOneMapReduceFailover(this, childcid); })); th.IsBackground = true; th.Start(); cID++; } }
internal void ExecOneMapReduceFailover(FailoverInfo parent, int myCID) { #if FAILOVER_DEBUG { Log("Begin ExecOneMapReduceFailover: parent=" + (parent == null ? "null" : "has parent") + "; myCID=" + myCID.ToString()); } #endif try { if (allBlocks == null) { throw new Exception("FailoverInfo allBlocks is null."); } //Start mapblocks firstthread uint sleepCnt = 0; { #if TESTFAULTTOLERANT { Console.WriteLine(@"FAILOVER_TEST: before map threads start"); while (System.IO.File.Exists(@"c:\temp\failovertest1.txt")) { Console.Write("z"); System.Threading.Thread.Sleep(10000); } } #endif foreach (MapReduceBlockInfo bl in allBlocks) { bl.all = workingBlocks; bl.thread = new System.Threading.Thread(new System.Threading.ThreadStart(bl.firstthreadproc)); bl.thread.Name = "MapReduceJobBlock" + bl.BlockID + "_map"; bl.thread.IsBackground = true; AELight_StartTraceThread(bl.thread); } awakeCnt = 0; for (; ; ) { #if FAILOVER_DEBUG { Log("Loop at map. Sleepcnt=" + sleepCnt.ToString()); string debugtxt = "==========Blockstatus at map==========" + Environment.NewLine; lock (blockStatus) { foreach (KeyValuePair<int, int> pair in blockStatus) { debugtxt += "****blockcid=" + pair.Key.ToString() + ";status=" + pair.Value.ToString() + Environment.NewLine; } } Log(debugtxt); } #endif if (AllBlocksCompleted(1)) { #if FAILOVER_DEBUG Log("All map blocks completed. Breaking out of map loop..."); #endif break; } System.Threading.Thread.Sleep(failoverShared.dc.FailoverTimeout); if (sleepCnt++ > failoverShared.dc.FailoverDoCheck) { sleepCnt = 0; //sleep again #if FAILOVER_DEBUG Log("Health check at map loop;awakeCnt=" + awakeCnt.ToString()); #endif if (CheckHealthMap(false)) { #if FAILOVER_DEBUG { Log("Disk failure detected at map loop..."); string debugtxt = "======Bad hosts found=======" + Environment.NewLine + string.Join(";", new List<string>(newBadHostToReason.Keys).ToArray()); Log(debugtxt); } #endif DisplayNewBadHosts(); foreach(string bh in newBadHostToReason.Keys) { AbortBlocksFromFailedHost(bh); } #if FAILOVER_DEBUG { Log("Done removing all bad blocks at map loop"); string debugtxt = ""; Log("========failover.allblocks========"); foreach (MapReduceBlockInfo bl in allBlocks) { debugtxt += Environment.NewLine + "****blockid=" + bl.BlockID.ToString() + ";blockcid=" + bl.BlockCID.ToString() + ";dfdetected=" + bl.diskfailuredetected.ToString() + ";host=" + bl.SlaveHost + Environment.NewLine + Environment.NewLine; } Log(debugtxt); } { string debugtxt = "==========Blockstatus at map==========" + Environment.NewLine; lock (blockStatus) { foreach (KeyValuePair<int, int> pair in blockStatus) { debugtxt += "****blockcid=" + pair.Key.ToString() + ";status=" + pair.Value.ToString() + Environment.NewLine; } } Log(debugtxt); Log("hostToBlocksCount=" + hostToBlocks.Count.ToString()); } #endif } awakeCnt++; } } #if TESTFAULTTOLERANT { Console.WriteLine(@"FAILOVER_TEST: after map threads joined"); while (System.IO.File.Exists(@"c:\temp\failovertest2.txt")) { Console.Write("z"); System.Threading.Thread.Sleep(10000); } } #endif if(CheckHealthMap(true)) { DisplayNewBadHosts(); foreach(string bh in newBadHostToReason.Keys) { AbortBlocksFromFailedHost(bh); } } for (int bi = 0; bi < allBlocks.Length; bi++) { MapReduceBlockInfo bl = allBlocks[bi]; if (!bl.diskfailuredetected) { AELight_JoinTraceThread(bl.thread); } } #if FAILOVER_DEBUG Log("All map blocks joined."); #endif if (badHosts.Count >= failoverShared.dc.Replication) { throw new Exception("Error: Cannot continue to exchange/sort/reduce phase. The number of machines removed (" + (badHosts.Count).ToString() + ") is greater than or equal to replication factor (" + failoverShared.dc.Replication.ToString() + ")."); } for (int bi = 0; bi < workingBlocks.Count; bi++) { MapReduceBlockInfo bl = workingBlocks[bi]; if (bl.diskfailuredetected || bl.blockfail) { bool foundgoodblock = false; for (int ri = 0; ri < failoverShared.dc.Replication - 1; ri++) { int nextrepblockcid = (ri + 1) * workingBlocks.Count + bl.BlockCID; MapReduceBlockInfo nextrepblock = allBlocks[nextrepblockcid]; if (!nextrepblock.diskfailuredetected && !nextrepblock.blockfail) { foundgoodblock = true; workingBlocks[bi] = nextrepblock; break; } } if (!foundgoodblock) { throw new Exception("Error: Cannot find a good replicated map block to replace the failed block. Block index = " + bi.ToString()); } } } #if FAILOVER_DEBUG { Log("=======Blocks going forward to exchange========="); string debugtxt = ""; foreach (MapReduceBlockInfo bl in workingBlocks) { debugtxt += Environment.NewLine + "****blockid=" + bl.BlockID.ToString() + ";blockcid=" + bl.BlockCID.ToString() + ";host=" + bl.SlaveHost + ";dfdetected=" + bl.diskfailuredetected.ToString() + Environment.NewLine; } Log(debugtxt); } #endif } if (failoverShared.verbose) { Console.WriteLine((failoverShared.extraverbose ? "\r\n" : "") + " [{0}] Map done; starting map exchange", System.DateTime.Now.ToString(), System.DateTime.Now.Millisecond); ConsoleFlush(); } //all map joined //Get good zmapblocks and their copies. string[] zmapblocks = new string[blockCount]; { foreach (MapReduceBlockInfo bl in allBlocks) { if (!bl.diskfailuredetected && !bl.blockfail) { string zm = zmapblocks[bl.BlockID]; if (zm != null) { zm += "*"; } else { zm = ""; } zm += Surrogate.NetworkPathForHost(bl.SlaveHost) + @"\" + bl.acl.GetZMapBlockBaseName(); zmapblocks[bl.BlockID] = zm; } } } #if FAILOVER_DEBUG { /*string debugtxt = "zmapblocks: len=" + zmapblocks.Length.ToString() + Environment.NewLine; for (int zi = 0; zi < zmapblocks.Length; zi++) { debugtxt += zi.ToString() + ":" + zmapblocks[zi] + Environment.NewLine; } Log(debugtxt);*/ } #endif //assign zmapblocks workload for each working thread that is about to go into exchange. for (int wi = 0; wi < workingBlocks.Count; wi++) { MapReduceBlockInfo wb = workingBlocks[wi]; wb.ownedzmapblocks = zmapblocks[wb.BlockID]; string remotezms = ""; for (int zi = 0; zi < zmapblocks.Length; zi++) { if (zi != wb.BlockID) { if (remotezms.Length > 0) { remotezms += ";"; } remotezms += zmapblocks[zi]; } } wb.remotezmapblocks = remotezms; } #if FAILOVER_DEBUG { /* string debugtxt = "Done assign zmapblocks workload for each working thread:" + Environment.NewLine; for (int wi = 0; wi < workingBlocks.Count; wi++) { MapReduceBlockInfo wb = workingBlocks[wi]; debugtxt += "blockid=" + wb.BlockID.ToString() + ";blockcid=" + wb.BlockCID.ToString() + Environment.NewLine + "owned=" + wb.ownedzmapblocks + Environment.NewLine + "remote=" + wb.remotezmapblocks.Split(';').Length.ToString() + Environment.NewLine; } Log(debugtxt);*/ } #endif //Start exchange/sort/reduce blockStatus.Clear(); for (int bi = 0; bi < workingBlocks.Count; bi++) { MapReduceBlockInfo bl = workingBlocks[bi]; blockStatus.Add(bl.BlockCID, 0); string host = bl.SlaveHost.ToLower(); if (!hostToESRBlocks.ContainsKey(host)) { hostToESRBlocks.Add(host, new List<MapReduceBlockInfo>()); } hostToESRBlocks[host].Add(bl); } #if TESTFAULTTOLERANT { Console.WriteLine(@"FAILOVER_TEST: before esr threads start"); while (System.IO.File.Exists(@"c:\temp\failovertest3.txt")) { Console.Write("z"); System.Threading.Thread.Sleep(10000); } } #endif //start esr threads for (int bi = 0; bi < workingBlocks.Count; bi++) { MapReduceBlockInfo bl = workingBlocks[bi]; bl.thread = new System.Threading.Thread(new System.Threading.ThreadStart(bl.exchangethreadproc)); bl.thread.Name = "MapReduceJobBlock" + bl.BlockID + "_aftermap"; bl.thread.IsBackground = true; AELight_StartTraceThread(bl.thread); } sleepCnt = 0; //! awakeCnt = 0; //! for (; ; ) { #if FAILOVER_DEBUG { Log("Loop at esr. SleepCnt=" + sleepCnt.ToString()); string debugtxt = "==========Blockstatus at esr==========" + Environment.NewLine; lock (blockStatus) { foreach (KeyValuePair<int, int> pair in blockStatus) { debugtxt += "****blockcid=" + pair.Key.ToString() + ";status=" + pair.Value.ToString() + Environment.NewLine; } } Log(debugtxt); } #endif if (AllBlocksCompleted(1)) { #if FAILOVER_DEBUG Log("All esr blocks completed."); #endif #if TESTFAULTTOLERANT { Console.WriteLine(@"FAILOVER_TEST: all esr blocks completed, before breaking out of loop"); while (System.IO.File.Exists(@"c:\temp\failovertest4.txt")) { Console.Write("z"); System.Threading.Thread.Sleep(10000); } } #endif //do one more check before breaking out of loop. if(CheckHealthESR(true)) { #if FAILOVER_DEBUG Log("df detected before breaking out of esr loop"); #endif sleepCnt = 0; DisplayNewBadHosts(); RehashESRBlocksFromFailedHosts(); } else { break; } } System.Threading.Thread.Sleep(failoverShared.dc.FailoverTimeout); if (sleepCnt++ > failoverShared.dc.FailoverDoCheck) { sleepCnt = 0; //sleep again #if FAILOVER_DEBUG Log("Health check at esr loop;awakeCnt=" + awakeCnt.ToString()); #endif if (CheckHealthESR(false)) { DisplayNewBadHosts(); RehashESRBlocksFromFailedHosts(); } awakeCnt++; } } #if FAILOVER_DEBUG Log("all esr joined..."); #endif //check my good workingblocks { Exception ee = null; foreach (MapReduceBlockInfo bl in workingBlocks) { if (!bl.diskfailuredetected) //still good. { AELight_JoinTraceThread(bl.thread); if (bl.blockfail) { ee = bl.LastThreadException; } } } if (null != ee) { throw new Exception("ESR workingblock error: " + ee.ToString()); } } //check child failover foreach (FailoverInfo failover in childFailovers.Values) { if (failover.LastException != null) { throw new Exception("childFailovers.count=" + childFailovers.Count.ToString() + ";Child failover error: " + failover.LastException.ToString()); } } #if FAILOVER_DEBUG Log("no esr exceptions..."); #endif //ALL DONE. Append blocks to parent only if everything is ok. if (parent != null) { #if FAILOVER_DEBUG Log("Adding my esrblocks to parent:"); { string debugtxt = "my esrblocks:" + Environment.NewLine; foreach (KeyValuePair<string, List<MapReduceBlockInfo>> pair in hostToESRBlocks) { debugtxt += pair.Key + ":" + pair.Value.Count.ToString() + Environment.NewLine; } Log(debugtxt); } #endif parent.AddHostToESRBlocks(hostToESRBlocks); } } catch (Exception e) { LastException = e; try { CloseAllBlocks(); } catch { } LogOutput("ExecOneMapReduceFailover error: " + e.ToString()); #if FAILOVER_DEBUG Log("ExecOneMapReduceFailover error: " + e.ToString()); #endif } if (parent != null) //report that i am done no matter if there is exception or not. { #if FAILOVER_DEBUG Log("UpdateBlockStatus mycID: " + myCID.ToString()); #endif parent.UpdateBlockStatus(myCID, 1); } #if FAILOVER_DEBUG { string debugtxt = "Exiting ExecOneMapReduceFailover...Final esrblocks:" + Environment.NewLine; foreach (KeyValuePair<string, List<MapReduceBlockInfo>> pair in hostToESRBlocks) { debugtxt += "host=" + pair.Key + ":" + pair.Value.Count.ToString() + Environment.NewLine; foreach (MapReduceBlockInfo bl in pair.Value) { debugtxt += "blockid=" + bl.BlockID.ToString() + ";blockcid=" + bl.BlockCID.ToString() + Environment.NewLine + "owned=" + bl.ownedzmapblocks + Environment.NewLine; } } Log(debugtxt); } #endif }