internal void AssignBlocksToHosts(MapReduceBlockInfo[] scrambled, string[] hosts, ref int nexthost, Dictionary<string, Dictionary<int, MapReduceBlockInfo>> hostToBlockIDs, List<MapReduceBlockInfo> collisions, Dictionary<string, Dictionary<int, MapReduceBlockInfo>> hostToBlockIDsPerReplication, int blocksperhost, int blockscount) { for (int bi = 0; bi < scrambled.Length; bi++) { string thishost = hosts[nexthost].ToLower(); MapReduceBlockInfo block = scrambled[bi]; Dictionary<int, MapReduceBlockInfo> blocksalreadyonhost = null; if (hostToBlockIDs.ContainsKey(thishost)) { blocksalreadyonhost = hostToBlockIDs[thishost]; } else { blocksalreadyonhost = new Dictionary<int, MapReduceBlockInfo>(blocksperhost); hostToBlockIDs.Add(thishost, blocksalreadyonhost); } if (blocksalreadyonhost.ContainsKey(block.BlockID)) { collisions.Add(block); continue; } block.SlaveHost = thishost; blocksalreadyonhost.Add(block.BlockID, block); if (!hostToBlockIDsPerReplication.ContainsKey(thishost)) { hostToBlockIDsPerReplication.Add(thishost, new Dictionary<int, MapReduceBlockInfo>(blockscount)); } hostToBlockIDsPerReplication[thishost].Add(block.BlockID, block); //move to the next host only if done assigning if (++nexthost >= hosts.Length) { nexthost = 0; } } //swap with another host if nothing was assigned, we are stuck at one host, and this host has all the blockids from the collision list. if (collisions.Count == scrambled.Length) { #if FAILOVER_DEBUG Log("Swap begins... collisioncount=" + collisions.Count.ToString() + ";scrambledcount=" + scrambled.Length.ToString()); #endif MapReduceBlockInfo xblock = collisions[0]; string thishost = hosts[nexthost].ToLower(); Dictionary<int, MapReduceBlockInfo> blocksalreadyonhost = null; if (hostToBlockIDs.ContainsKey(thishost)) { blocksalreadyonhost = hostToBlockIDs[thishost]; } else { blocksalreadyonhost = new Dictionary<int, MapReduceBlockInfo>(blocksperhost); hostToBlockIDs.Add(thishost, blocksalreadyonhost); } int phostindex = rnd.Next() % hosts.Length; for (int hi = 0; hi < hosts.Length; hi++) { string phost = hosts[phostindex].ToLower(); if (++phostindex >= hosts.Length) { phostindex = 0; } if (!hostToBlockIDsPerReplication.ContainsKey(phost)) { continue; } if (hostToBlockIDs.ContainsKey(phost)) { if (hostToBlockIDs[phost].ContainsKey(xblock.BlockID)) { continue; } } MapReduceBlockInfo blocktoswap = null; Dictionary<int, MapReduceBlockInfo> pblocks = hostToBlockIDsPerReplication[phost]; foreach (MapReduceBlockInfo pblock in pblocks.Values) { if (!blocksalreadyonhost.ContainsKey(pblock.BlockID)) { blocktoswap = pblock; break; } } if (blocktoswap == null) { continue; } //swap { hostToBlockIDs[phost].Remove(blocktoswap.BlockID); pblocks.Remove(blocktoswap.BlockID); blocktoswap.SlaveHost = thishost; blocksalreadyonhost.Add(blocktoswap.BlockID, blocktoswap); if (!hostToBlockIDsPerReplication.ContainsKey(thishost)) { hostToBlockIDsPerReplication.Add(thishost, new Dictionary<int, MapReduceBlockInfo>(blockscount)); } hostToBlockIDsPerReplication[thishost].Add(blocktoswap.BlockID, blocktoswap); xblock.SlaveHost = phost; hostToBlockIDs[phost].Add(xblock.BlockID, xblock); pblocks.Add(xblock.BlockID, xblock); collisions.RemoveAt(0); #if FAILOVER_DEBUG Log("Swap done"); #endif break; //done } } if (collisions.Count == scrambled.Length) { throw new Exception("Cannot resolve collisions"); } if (++nexthost >= hosts.Length) { nexthost = 0; } } }
internal void CreateBlocks(int blockcount, List<string> mapinputfilepaths, List<string> mapinputfilenames, List<int> mapinputoffsets, List<int> mapinputreclengths, FailoversShared failovershared, string[] goodhosts, string[] badhosts, bool rehash) { #if FAILOVER_DEBUG { string debugtxt = Environment.NewLine + "Begin CreateBlocks:" + Environment.NewLine + "blockcount=" + blockcount.ToString() + Environment.NewLine + "rehash=" + rehash.ToString() + Environment.NewLine + "goodhosts=" + string.Join(";", goodhosts) + Environment.NewLine + "badhosts=" + string.Join(";", badhosts) + Environment.NewLine; Log(debugtxt); } #endif blockCount = blockcount; failoverShared = failovershared; int allblockscount = blockcount * failovershared.dc.Replication; int blocksperhost = allblockscount / goodhosts.Length; if ((blocksperhost * goodhosts.Length) != allblockscount) { blocksperhost++; } hostToBlocks = new Dictionary<string, List<MapReduceBlockInfo>>(goodhosts.Length); allBlocks = new MapReduceBlockInfo[allblockscount]; goodHosts = new Dictionary<string, int>(goodhosts.Length); blockStatus = new Dictionary<int, int>(allblockscount); badHosts = new Dictionary<string, int>(goodhosts.Length + badhosts.Length); newBadHostToReason = new Dictionary<string, string>(goodhosts.Length); workingBlocks = new List<MapReduceBlockInfo>(blockcount); childFailovers = new Dictionary<int, FailoverInfo>(failovershared.dc.Replication); hostToESRBlocks = new Dictionary<string, List<MapReduceBlockInfo>>(goodhosts.Length); foreach (string host in goodhosts) { goodHosts.Add(host.ToLower(), 0); } foreach (string host in badhosts) { badHosts.Add(host.ToLower(), 0); } for (int ri = 0; ri < failovershared.dc.Replication; ri++) { for (int bi = 0; bi < blockcount; bi++) { MapReduceBlockInfo block = new MapReduceBlockInfo(); block.BlockID = bi; block.BlockCID = ri * blockcount + block.BlockID; cID = block.BlockCID; block.rehash = rehash; block.failover = this; allBlocks[block.BlockCID] = block; blockStatus.Add(block.BlockCID, 0); } } cID++; MapReduceBlockInfo[] firstset = new MapReduceBlockInfo[blockcount]; { Dictionary<string, Dictionary<int, MapReduceBlockInfo>> hostToBlockIDs = new Dictionary<string, Dictionary<int, MapReduceBlockInfo>>(goodhosts.Length); int nexthost = 0; List<MapReduceBlockInfo> collisions = new List<MapReduceBlockInfo>(blockcount); Dictionary<string, Dictionary<int, MapReduceBlockInfo>> hostToBlockIDsPerRep = new Dictionary<string, Dictionary<int, MapReduceBlockInfo>>(goodhosts.Length); for (int ri = 0; ri < failovershared.dc.Replication; ri++) { #if FAILOVER_DEBUG Log("Assigning hosts to blocks in replication index = " + ri.ToString()); #endif hostToBlockIDsPerRep.Clear(); MapReduceBlockInfo[] scrambled = new MapReduceBlockInfo[blockcount]; //! for (int bi = 0; bi < scrambled.Length; bi++) { scrambled[bi] = allBlocks[bi + ri * blockcount]; } for (int bi = 0; bi < scrambled.Length; bi++) { int rndindex = rnd.Next() % scrambled.Length; MapReduceBlockInfo oldvalue = scrambled[bi]; scrambled[bi] = scrambled[rndindex]; scrambled[rndindex] = oldvalue; } if (ri == 0) { for (int bi = 0; bi < scrambled.Length; bi++) { firstset[bi] = scrambled[bi]; } } #if FAILOVER_DEBUG { /*string debugtxt = "firstset:" + Environment.NewLine; foreach (MapReduceBlockInfo bl in firstset) { debugtxt += bl.BlockID.ToString() + ":" + bl.BlockCID.ToString() + ":" + (bl.SlaveHost == null ? "null" : bl.SlaveHost) + Environment.NewLine; } Log(debugtxt);*/ } #endif int tryremains = blockcount; for (; ; ) { AssignBlocksToHosts(scrambled, goodhosts, ref nexthost, hostToBlockIDs, collisions, hostToBlockIDsPerRep, blocksperhost, blockcount); if (collisions.Count == 0) { break; } if (--tryremains <= 0) { throw new Exception("Cannot resolve collisions. Reached maximum number of tries."); } scrambled = collisions.ToArray(); collisions.Clear(); } } foreach (MapReduceBlockInfo block in allBlocks) { string host = block.SlaveHost.ToLower(); if (!hostToBlocks.ContainsKey(host)) { hostToBlocks.Add(host, new List<MapReduceBlockInfo>(blocksperhost)); } hostToBlocks[host].Add(block); } } MapReduceBlockInfo.JobBlocksShared jobshared = new MapReduceBlockInfo.JobBlocksShared(); jobshared.noutputmethod = failovershared.cfgj.IOSettings.OutputMethod; jobshared.blockcount = blockcount; jobshared.ExecOpts = failovershared.execopts; foreach (MapReduceBlockInfo block in allBlocks) { block.jobshared = jobshared; block.allinputsamples = null; block.extraverbose = failovershared.extraverbose; block.AddCacheOnly = false; block.outputfiles = failovershared.outputfiles; block.outputfile = failovershared.outputfile; block.basefilesize = failovershared.dc.DataNodeBaseSize; block.cfgj = failovershared.cfgj; block.SlaveIP = IPAddressUtil.GetIPv4Address(block.SlaveHost); block.ExecArgs = failovershared.execargs; block.logname = failovershared.logname; block.acl = new MySpace.DataMining.DistributedObjects5.ArrayComboList(failovershared.cfgj.NarrativeName + "_BlockID" + block.BlockID.ToString(), failovershared.cfgj.IOSettings.KeyLength); block.acl.SetJID(jid); block.acl.HealthPluginPaths = healthpluginpaths; block.acl.FaultTolerant = true; block.acl.FTReadTimeout = failovershared.dc.slave.FaultTolerantReadTimeout; block.acl.FTReadRetries = failovershared.dc.slave.FaultTolerantReadRetries; block.acl.FTConnectRetries = failovershared.dc.slave.FaultTolerantConnectRetries; int IntermediateDataAddressing = failovershared.cfgj.IntermediateDataAddressing; if (0 == IntermediateDataAddressing) { IntermediateDataAddressing = failovershared.dc.IntermediateDataAddressing; } block.acl.ValueOffsetSize = IntermediateDataAddressing / 8; if (block.acl.ValueOffsetSize <= 0) { throw new InvalidOperationException("Invalid value for IntermediateDataAddressing: " + IntermediateDataAddressing.ToString()); } block.acl.PartialReduce = (failovershared.cfgj.PartialReduce != null); block.acl.InputRecordLength = MySpace.DataMining.DistributedObjects.StaticGlobals.DSpace_InputRecordLength; block.acl.OutputRecordLength = MySpace.DataMining.DistributedObjects.StaticGlobals.DSpace_OutputRecordLength; block.acl.OutputRecordLengths = failovershared.outputrecordlengths; block.acl.InputRecordLengths = new List<int>(); block.acl.CookRetries = failovershared.dc.slave.CookRetries; block.acl.CookTimeout = failovershared.dc.slave.CookTimeout; block.acl.LocalCompile = (0 == block.BlockID); block.acl.BTreeCapSize = failovershared.dc.BTreeCapSize; MySpace.DataMining.DistributedObjects5.DistObject.FILE_BUFFER_SIZE = FILE_BUFFER_SIZE; block.acl.atype = atype; block.acl.DfsSampleDistance = failovershared.dc.DataNodeBaseSize / failovershared.dc.DataNodeSamples; block.slaveconfigxml = failovershared.slaveconfigxml; block.acl.CompressFileOutput = failovershared.dc.slave.CompressDfsChunks; block.acl.ZMapBlockCount = blockcount; block.verbose = failovershared.verbose; block.acl.CompilerOptions = failovershared.cfgj.IOSettings.CompilerOptions; block.acl.CompilerVersion = failovershared.cfgj.IOSettings.CompilerVersion; if (failovershared.cfgj.AssemblyReferencesCount > 0) { failovershared.cfgj.AddAssemblyReferences(block.acl.CompilerAssemblyReferences, Surrogate.NetworkPathForHost(block.SlaveHost)); } if (failovershared.cfgj.OpenCVExtension != null) { block.acl.AddOpenCVExtension(); } if (failovershared.cfgj.MemCache != null) { //block.acl.AddMemCacheExtension(); Console.Error.WriteLine(" MemCache cannot be enabled with fault-tolerant execution" + "; job '{0}' has <MemCache/> tag", failovershared.cfgj.NarrativeName); SetFailure(); return; } if (failovershared.cfgj.Unsafe != null) { block.acl.AddUnsafe(); } block.gencodectx(); block.acl.AddBlock("1", "1", block.SlaveHost + @"|" + (failovershared.cfgj.ForceStandardError != null ? "&" : "") + block.logname + @"|slaveid=0"); block.ownedzmapblockIDs.Add(block.BlockID); if (block.BlockCID < blockcount) { workingBlocks.Add(block); } } #if FAILOVER_DEBUG /*{ Log("mapinputfilepaths"); string debugtxt = ""; foreach (string xx in mapinputfilepaths) { debugtxt += xx + Environment.NewLine; } Log(debugtxt); } if (mapinputoffsets != null) { Log("mapinputoffsets"); string debugtxt = ""; foreach (int xx in mapinputoffsets) { debugtxt += xx.ToString() + Environment.NewLine; } Log(debugtxt); } if (mapinputfilenames != null) { Log("mapinputfilenames"); string debugtxt = ""; foreach (string xx in mapinputfilenames) { debugtxt += xx + Environment.NewLine; } Log(debugtxt); } if (mapinputreclengths != null) { Log("mapinputreclengths"); string debugtxt = ""; foreach (int xx in mapinputreclengths) { debugtxt += xx.ToString() + Environment.NewLine; } Log(debugtxt); }*/ #endif { List<string> _mapinputfilepaths = null; List<string> _mapinputfilenames = null; List<int> _mapinputoffsets = null; List<int> _mapinputreclengths = null; if (string.Compare("next", failoverShared.cfgj.FaultTolerantExecution.MapInputOrder, StringComparison.OrdinalIgnoreCase) == 0) { _mapinputfilepaths = mapinputfilepaths; _mapinputfilenames = mapinputfilenames; _mapinputoffsets = mapinputoffsets; _mapinputreclengths = mapinputreclengths; } else if (string.Compare("shuffle", failoverShared.cfgj.FaultTolerantExecution.MapInputOrder, StringComparison.OrdinalIgnoreCase) == 0) { if (!rehash) { _mapinputfilepaths = new List<string>(mapinputfilepaths.Count); _mapinputfilenames = new List<string>(mapinputfilepaths.Count); _mapinputoffsets = new List<int>(mapinputfilepaths.Count); _mapinputreclengths = new List<int>(mapinputfilepaths.Count); for (int ci = 0; ci < mapinputfilepaths.Count; ci++) { _mapinputfilepaths.Add(mapinputfilepaths[ci]); _mapinputoffsets.Add(ci); } for (int oi = 0; oi < mapinputoffsets.Count; oi++) { string fname = mapinputfilenames[oi]; int reclen = mapinputreclengths[oi]; int expand = (oi == mapinputoffsets.Count - 1 ? mapinputfilepaths.Count : mapinputoffsets[oi + 1]); expand = expand - _mapinputfilenames.Count; for (int ei = 0; ei < expand; ei++) { _mapinputfilenames.Add(fname); _mapinputreclengths.Add(reclen); } } for (int ci = 0; ci < _mapinputfilepaths.Count; ci++) { int rndindex = rnd.Next() % _mapinputfilepaths.Count; string oldchunk = _mapinputfilepaths[ci]; _mapinputfilepaths[ci] = _mapinputfilepaths[rndindex]; _mapinputfilepaths[rndindex] = oldchunk; string oldfname = _mapinputfilenames[ci]; _mapinputfilenames[ci] = _mapinputfilenames[rndindex]; _mapinputfilenames[rndindex] = oldfname; int oldreclen = _mapinputreclengths[ci]; _mapinputreclengths[ci] = _mapinputreclengths[rndindex]; _mapinputreclengths[rndindex] = oldreclen; } } else { _mapinputfilepaths = new List<string>(mapinputfilepaths.Count); for (int ci = 0; ci < mapinputfilepaths.Count; ci++) { _mapinputfilepaths.Add(mapinputfilepaths[ci]); } for (int ci = 0; ci < _mapinputfilepaths.Count; ci++) { int rndindex = rnd.Next() % _mapinputfilepaths.Count; string oldchunk = _mapinputfilepaths[ci]; _mapinputfilepaths[ci] = _mapinputfilepaths[rndindex]; _mapinputfilepaths[rndindex] = oldchunk; } } } else { throw new Exception("Computing InputOrder is not valid"); } #if FAILOVER_DEBUG /*Log("Done shuffling; MapInputOrder=" + failoverShared.cfgj.Computing.MapInputOrder); { Log("mapinputfilepaths"); string debugtxt = ""; foreach (string xx in mapinputfilepaths) { debugtxt += xx + Environment.NewLine; } Log(debugtxt); } if (mapinputoffsets != null) { Log("mapinputoffsets"); string debugtxt = ""; foreach (int xx in mapinputoffsets) { debugtxt += xx.ToString() + Environment.NewLine; } Log(debugtxt); } if (mapinputfilenames != null) { Log("mapinputfilenames"); string debugtxt = ""; foreach (string xx in mapinputfilenames) { debugtxt += xx + Environment.NewLine; } Log(debugtxt); } if (mapinputreclengths != null) { Log("mapinputreclengths"); string debugtxt = ""; foreach (int xx in mapinputreclengths) { debugtxt += xx.ToString() + Environment.NewLine; } Log(debugtxt); } { Log("_mapinputfilepaths"); string debugtxt = ""; foreach (string xx in _mapinputfilepaths) { debugtxt += xx + Environment.NewLine; } Log(debugtxt); } if (_mapinputoffsets != null) { Log("_mapinputoffsets"); string debugtxt = ""; foreach (int xx in _mapinputoffsets) { debugtxt += xx.ToString() + Environment.NewLine; } Log(debugtxt); } if (_mapinputfilenames != null) { Log("_mapinputfilenames"); string debugtxt = ""; foreach (string xx in _mapinputfilenames) { debugtxt += xx + Environment.NewLine; } Log(debugtxt); } if (_mapinputreclengths != null) { Log("_mapinputreclengths"); string debugtxt = ""; foreach (int xx in _mapinputreclengths) { debugtxt += xx.ToString() + Environment.NewLine; } Log(debugtxt); }*/ #endif int firstsetpos = -1; MapReduceBlockInfo targetblock = null; string[] dfsfilenames = null; string curfilename = null; int curreclen = 0; int curoffset = -1; int fi = 0; if (!rehash) { dfsfilenames = new string[blockcount]; curoffset = _mapinputoffsets[fi]; } for (int mi = 0; mi < _mapinputfilepaths.Count; mi++) { if (curoffset == mi) { curfilename = _mapinputfilenames[fi]; curreclen = _mapinputreclengths[fi]; if (++fi < _mapinputoffsets.Count) { curoffset = _mapinputoffsets[fi]; } } if (++firstsetpos >= firstset.Length) { firstsetpos = 0; } targetblock = firstset[firstsetpos]; targetblock.mapinputdfsnodes.Add(_mapinputfilepaths[mi]); if (!rehash && dfsfilenames[targetblock.BlockID] != curfilename) { if (targetblock.mapinputfilenames == null) { targetblock.mapinputfilenames = new List<string>(); targetblock.mapinputnodesoffsets = new List<int>(); } int offset = targetblock.mapinputdfsnodes.Count - 1; targetblock.mapinputnodesoffsets.Add(offset); targetblock.mapinputfilenames.Add(curfilename); targetblock.acl.InputRecordLengths.Add(curreclen); dfsfilenames[targetblock.BlockID] = curfilename; } } } //scramble each inputfile node * path and assign to the other set of blocks { foreach (MapReduceBlockInfo block in firstset) { for (int ri = 1; ri < failovershared.dc.Replication; ri++) { MapReduceBlockInfo repblock = allBlocks[ri * blockcount + block.BlockID]; repblock.mapinputnodesoffsets = block.mapinputnodesoffsets; repblock.mapinputfilenames = block.mapinputfilenames; repblock.acl.InputRecordLengths = block.acl.InputRecordLengths; } for (int mi = 0; mi < block.mapinputdfsnodes.Count; mi++) { string mpinput = block.mapinputdfsnodes[mi]; string[] parts = mpinput.Split('*'); for (int ri = 0; ri < failovershared.dc.Replication; ri++) { int firsthost = rnd.Next() % parts.Length; string sbmpinput = ""; for (int pi = 0; pi < parts.Length; pi++) { if (firsthost >= parts.Length) { firsthost = 0; } if (sbmpinput.Length > 0) { sbmpinput += "*"; } sbmpinput += parts[firsthost++]; } MapReduceBlockInfo repblock = allBlocks[ri * blockcount + block.BlockID]; if (ri == 0) { repblock.mapinputdfsnodes[mi] = sbmpinput; //just replace with scrambled one. } else { repblock.mapinputdfsnodes.Add(sbmpinput); } } } } } #if FAILOVER_DEBUG //SANITY CHECK //make sure each host doesn't have repeated blockid. { Dictionary<string, Dictionary<int, MapReduceBlockInfo>> san = new Dictionary<string, Dictionary<int, MapReduceBlockInfo>>(); foreach (MapReduceBlockInfo bl in allBlocks) { if (!san.ContainsKey(bl.SlaveHost.ToLower())) { san.Add(bl.SlaveHost.ToLower(), new Dictionary<int, MapReduceBlockInfo>()); } san[bl.SlaveHost.ToLower()].Add(bl.BlockID, bl); } Log("====== SANITY CHECK PASSED #1 ======"); Log("hostscount in allBlocks=" + san.Count.ToString()); string debugtxt = ""; foreach (string h in san.Keys) { debugtxt += h + ":" + san[h].Values.Count.ToString() + Environment.NewLine; } Log(debugtxt); foreach (string host in san.Keys) { Log("host " + host + " has " + san[host].Count.ToString() + " blocks"); } Dictionary<int, int> repblockcountperhost = new Dictionary<int, int>(); foreach (string host in san.Keys) { Dictionary<int, List<MapReduceBlockInfo>> repfactorToBlocks = new Dictionary<int, List<MapReduceBlockInfo>>(); Dictionary<int, MapReduceBlockInfo> blocks = san[host]; foreach (MapReduceBlockInfo block in blocks.Values) { int repf = (block.BlockCID - block.BlockID) / blockcount; if (!repfactorToBlocks.ContainsKey(repf)) { repfactorToBlocks.Add(repf, new List<MapReduceBlockInfo>()); } repfactorToBlocks[repf].Add(block); } Log("Distribution of blocks in host " + host); foreach (int repf in repfactorToBlocks.Keys) { Log("replication index=" + repf.ToString() + "; blocks=" + repfactorToBlocks[repf].Count); if (!repblockcountperhost.ContainsKey(repfactorToBlocks[repf].Count)) { repblockcountperhost.Add(repfactorToBlocks[repf].Count, 0); } } } string txt = ""; foreach (int r in repblockcountperhost.Keys) { txt += r.ToString() + ","; } Log("Replication blocks count per host:" + txt); } { if (blockcount != workingBlocks.Count) { throw new Exception("blockscount != workingBlocks.Count; blockscount=" + blockcount.ToString() + ";workingBlocks.count=" + workingBlocks.Count.ToString()); } Log("====== SANITY CHECK PASSED #2 ======"); } //make sure all blockid and blockcid are correct. { for (int bi = 0; bi < allBlocks.Length; bi++) { MapReduceBlockInfo bl = allBlocks[bi]; if (bi != bl.BlockCID) { throw new Exception("bi doesn't match bl.BlockCID. bi=" + bi.ToString() + ";bl.BlockCID=" + bl.BlockCID.ToString()); } if ((bl.BlockCID - bl.BlockID) % blockcount != 0) { throw new Exception("(bl.BlockCID - bl.BlockID) % blockscount != 0. bl.blockcid=" + bl.BlockCID.ToString() + ";bl.blockid=" + bl.BlockID.ToString()); } } Log("====== SANITY CHECK PASSED #3 ======"); } //check blockstatus { if (blockStatus.Count != allblockscount) { throw new Exception("blockStatus.Count != allblockscount;blockstatuscount=" + blockStatus.Count.ToString() + ";allblockscount=" + allblockscount.ToString()); } List<int> san = new List<int>(blockStatus.Keys); san.Sort(); if (san[0] != 0) { throw new Exception("san[0] != 0; san[0]=" + san[0].ToString()); } if (san[allblockscount - 1] != allblockscount - 1) { throw new Exception("san[allblockscount - 1] != allblockscount - 1; san[allblockscount-1]=" + san[allblockscount - 1].ToString() + ";allblockscount=" + allblockscount.ToString()); } Log("====== SANITY CHECK PASSED #4 ======"); } //make sure each block id has repfactor number of copies and on different host. { Dictionary<int, Dictionary<string, MapReduceBlockInfo>> san = new Dictionary<int, Dictionary<string, MapReduceBlockInfo>>(); foreach (MapReduceBlockInfo bl in allBlocks) { if (!san.ContainsKey(bl.BlockID)) { san.Add(bl.BlockID, new Dictionary<string, MapReduceBlockInfo>()); } san[bl.BlockID].Add(bl.SlaveHost, bl); } foreach (int blockid in san.Keys) { Dictionary<string, MapReduceBlockInfo> bls = san[blockid]; if (bls.Count != failovershared.dc.Replication) { throw new Exception("bls.Count != repfactor"); } } Log("====== SANITY CHECK PASSED #5 ======"); { string txt = ""; foreach (int blockid in san.Keys) { txt += blockid.ToString() + Environment.NewLine; Dictionary<string, MapReduceBlockInfo> repbs = san[blockid]; foreach (KeyValuePair<string, MapReduceBlockInfo> pair in repbs) { txt += pair.Value.BlockID.ToString() + ":" + pair.Value.BlockCID.ToString() + ":" + pair.Value.SlaveHost + Environment.NewLine; } } Log("Blocks distribution:"); Log(txt); } } { /*Log("Done CreateBlocks()..."); string debugtxt = "==========failover.allBlocks==========" + Environment.NewLine; foreach (MapReduceBlockInfo bl in allBlocks) { debugtxt += Environment.NewLine + "****blockid=" + bl.BlockID.ToString() + ";blockcid=" + bl.BlockCID.ToString() + ";host=" + bl.SlaveHost + Environment.NewLine + string.Join(";", bl.mapinputdfsnodes.ToArray()) + Environment.NewLine; } Log(debugtxt);*/ } { /*string debugtxt = "==========Blockstatus==========" + Environment.NewLine; lock (blockStatus) { foreach (KeyValuePair<int, int> pair in blockStatus) { debugtxt += "****blockcid=" + pair.Key.ToString() + ";status=" + pair.Value.ToString() + Environment.NewLine; } } Log(debugtxt);*/ } #endif }