public void CreateSequenceFile(string name) { using (System.IO.StreamWriter sf = System.IO.File.CreateText(name)) { sf.WriteLine("*sequence*"); for (int i = 0; i < nodes.Length; i++) { sf.WriteLine(Surrogate.NetworkPathForHost( nodes[i].Host.Split(';')[0]) + @"\" + nodes[i].Name); } } }
internal static void _KillMemCache_mt(List <dfs.DfsFile> delfiles, bool verbose) { dfs dc = LoadDfsConfig(); string[] slaves = dc.Slaves.SlaveList.Split(';'); foreach (dfs.DfsFile df in delfiles) { try { // Unpin shared memory segments on all machines. _MemCacheRelease(df, true); // force=true } catch (System.Threading.ThreadAbortException) { } catch (Exception e) { if (verbose) { string msg = e.Message; if (-1 == msg.IndexOf("MemCacheWarning")) { Console.WriteLine("Warning: {0}", msg); } } } foreach (string slave in slaves) { try { string mdfp = Surrogate.NetworkPathForHost(slave) + @"\" + df.MemCache.MetaFileName; System.IO.File.Delete(mdfp); } catch { } } } _KillDataFileChunks_unlocked_mt(delfiles, false); if (verbose) { foreach (dfs.DfsFile df in delfiles) { Console.WriteLine("Successfully deleted MemCache '{0}' ({1} parts)", df.Name, df.Nodes.Count); } } }
System.IO.Stream _OpenStream(dfs.DfsFile.FileNode node) { string[] nodehosts = node.Host.Split(';'); string[] fullnames = new string[nodehosts.Length]; int ReplicateCurrentIndex = ReplicateStartIndex; for (int i = 0; i < fullnames.Length; i++) { fullnames[i] = Surrogate.NetworkPathForHost( nodehosts[ReplicateCurrentIndex % nodehosts.Length]) + @"\" + node.Name; ReplicateCurrentIndex++; } return(new MySpace.DataMining.AELight.DfsFileNodeStream(fullnames, true, System.IO.FileMode.Open, System.IO.FileAccess.Read, System.IO.FileShare.Read, 0x400 * 4)); }
static void EnsureReplication(string dfsxmlpath, int replicationfactor) { bool toomany = false; Console.WriteLine("Ensure data is replicated..."); { dfs dc = dfs.ReadDfsConfig_unlocked(dfsxmlpath); foreach (dfs.DfsFile df in dc.Files) { if (0 == string.Compare(DfsFileTypes.NORMAL, df.Type, StringComparison.OrdinalIgnoreCase) || 0 == string.Compare(DfsFileTypes.BINARY_RECT, df.Type, StringComparison.OrdinalIgnoreCase)) { foreach (dfs.DfsFile.FileNode fn in df.Nodes) { string[] nhosts = fn.Host.Split(';', ','); if (nhosts.Length < replicationfactor) { throw new Exception("dfs://" + df.Name + " node " + fn.Name + " does not live on " + replicationfactor.ToString() + " machines"); } else if (nhosts.Length > replicationfactor) { if (!toomany) { toomany = true; Console.Error.WriteLine("Warning: too many replicates for one or more DFS file chunks"); } } for (int ni = 0; ni < nhosts.Length; ni++) { string np = Surrogate.NetworkPathForHost(nhosts[ni]) + @"\" + fn.Name; if (!System.IO.File.Exists(np)) { throw new Exception("dfs://" + df.Name + " node " + fn.Name + " does not actually live on host " + nhosts[ni] + " as indicated by meta-data [" + np + "]"); } if (df.HasZsa) { if (!System.IO.File.Exists(np + ".zsa")) { throw new Exception("Sample data for dfs://" + df.Name + " node " + fn.Name + " (" + fn.Name + ".zsa) does not actually live on host " + nhosts[ni] + " as indicated by meta-data [" + np + ".zsa]"); } } } } } } } }
public DfsStream(string dfsfile, bool PreserveOrder, bool MachineLock) { if (MachineLock) { this.Mutex = new System.Threading.Mutex(false, "DfsStream{24A86864-EED6-4680-AB0E-3BDE97262339}"); this.Mutex.WaitOne(); } ReplicateStartIndex = StaticGlobals.Qizmt_BlockID; surrogatedir = Surrogate.NetworkPathForHost(Surrogate.MasterHost); dfs dc = dfs.ReadDfsConfig_unlocked(surrogatedir + @"\" + dfs.DFSXMLNAME); this.RetryTimeout = dc.slave.CookTimeout; this.RetryCount = dc.slave.CookRetries; dfs.DfsFile df = dc.FindAny(dfsfile); if (null == df) { throw new System.IO.FileNotFoundException("DFS file '" + dfsfile + "' not found", dfsfile); } if (0 != string.Compare(DfsFileTypes.NORMAL, df.Type, StringComparison.OrdinalIgnoreCase) && 0 != string.Compare(DfsFileTypes.BINARY_RECT, df.Type, StringComparison.OrdinalIgnoreCase)) { throw new InvalidOperationException("DFS file '" + df.Name + "' cannot be opened because file is of type " + df.Type); } this.reclen = df.RecordLength; nodes = df.Nodes.ToArray(); if (!PreserveOrder) { Random rnd = new Random(unchecked ( System.Threading.Thread.CurrentThread.ManagedThreadId + DateTime.Now.Millisecond * 351 + ReplicateStartIndex + nodes.Length * 6131)); for (int i = 0; i < nodes.Length; i++) { int ridx = rnd.Next(0, nodes.Length); dfs.DfsFile.FileNode tmpnode = nodes[i]; nodes[i] = nodes[ridx]; nodes[ridx] = tmpnode; } } }
internal static void DoDfsCheck(string[] args, bool fix) { if (fix) { if (args.Length == 0) { Console.Error.WriteLine(" -all switch required for dfsfix"); SetFailure(); return; } } string userspec = null; bool userspecchunk = false; bool userspecdfsfile = false; if (args.Length > 0) { if (0 != string.Compare(args[0], "-all", true)) { if ('-' == args[0][0]) { Console.Error.WriteLine("Unknown switch: {0}", args[0]); SetFailure(); return; } userspec = args[0]; if (userspec.StartsWith("dfs://", StringComparison.OrdinalIgnoreCase)) { userspec = userspec.Substring(6); userspecdfsfile = true; } else if (userspec.StartsWith("zd.", StringComparison.OrdinalIgnoreCase) && userspec.EndsWith(".zd", StringComparison.OrdinalIgnoreCase) && -1 != userspec.IndexOf('-') && userspec.Length > 45) { userspecchunk = true; } else { userspecdfsfile = true; } } } dfs dc = LoadDfsConfig(); if (dc.Files.Count == 0) { Console.WriteLine("No DFS files to scan"); return; } string tempfnpost = "." + Guid.NewGuid().ToString() + "." + System.Diagnostics.Process.GetCurrentProcess().Id.ToString(); string[] hosts = dc.Slaves.SlaveList.Split(';'); Dictionary <string, StringBuilder> sbdfsfiles = new Dictionary <string, StringBuilder>(StringComparer.OrdinalIgnoreCase); foreach (string host in hosts) { StringBuilder sb = new StringBuilder(1024); sbdfsfiles[host] = sb; } bool addedone = false; foreach (dfs.DfsFile df in dc.Files) { if (0 == string.Compare(DfsFileTypes.NORMAL, df.Type, true) || 0 == string.Compare(DfsFileTypes.BINARY_RECT, df.Type, true)) { if (userspecdfsfile) { if (0 != string.Compare(df.Name, userspec, true)) { continue; } } foreach (dfs.DfsFile.FileNode fn in df.Nodes) { if (userspecchunk) { if (0 != string.Compare(fn.Name, userspec, true)) { continue; } } foreach (string rhost in fn.Host.Split(';')) { StringBuilder sb = sbdfsfiles[rhost]; sb.AppendLine(fn.Host + @"?" + df.Name + @"?" + fn.Name); } addedone = true; } } } if (!addedone) { Console.WriteLine("Nothing to scan"); return; } foreach (KeyValuePair <string, StringBuilder> kvp in sbdfsfiles) { try { System.IO.File.WriteAllText(Surrogate.NetworkPathForHost(kvp.Key) + @"\dfscheck" + tempfnpost, kvp.Value.ToString()); } catch (Exception e) { Console.Error.WriteLine("Skipping host {0} due to error: {1}", kvp.Key, e.Message); LogOutputToFile(e.ToString()); } } string jobsfn = "dfschecks-jobs.xml" + tempfnpost; try { using (System.IO.StreamWriter sw = System.IO.File.CreateText(jobsfn)) { sw.Write((@"<SourceCode> <Jobs> <Job Name=`DfsCheck` Custodian=`` Email=`` Description=``> <IOSettings> <JobType>remote</JobType> <DFS_IO_Multi> <DFSReader></DFSReader> <DFSWriter></DFSWriter> <Mode>ALL MACHINES</Mode> </DFS_IO_Multi> </IOSettings> <Remote> <![CDATA[ int numproblems = 0; int numfixed = 0; public virtual void Remote(RemoteInputStream dfsinput, RemoteOutputStream dfsoutput) { const bool fix = " + (fix ? "true" : "false") + @"; string dfsfilesinfofp = `dfscheck" + tempfnpost + @"`; string[] dfsfilesinfos = System.IO.File.ReadAllLines(dfsfilesinfofp); if(dfsfilesinfos.Length == 0) { return; } MySpace.DataMining.Threading.ThreadTools<string>.Parallel( new Action<string>( delegate(string dfsfilesinf) { string[] qq = dfsfilesinf.Split('?'); string chunkhost = qq[0]; // This machine is the first one. string dfsfilename = qq[1]; string dfschunkname = qq[2]; try { using(System.IO.FileStream fs = new System.IO.FileStream(dfschunkname, System.IO.FileMode.Open, System.IO.FileAccess.Read, System.IO.FileShare.Read)) { for(;;) { if(-1 == fs.ReadByte()) { break; } } } } catch { lock(this) { numproblems++; } Qizmt_Log(` DFS file '` + dfsfilename + `' has problem with chunk '` + dfschunkname + `' on host ` + Qizmt_MachineHost); if(fix) { string[] rhosts = chunkhost.Split(';'); if(rhosts.Length < 2) { Qizmt_Log(` Not enough replicates`); } else { for(int ir = 1; ir < rhosts.Length; ir++) { try { string newdfschunkname = `zd.dfscheck.` + Guid.NewGuid().ToString() + `.zd`; System.IO.File.Copy(MySpace.DataMining.AELight.Surrogate.NetworkPathForHost(rhosts[ir]) + @`\` + dfschunkname, newdfschunkname, false); System.IO.File.Delete(dfschunkname); System.IO.File.Move(newdfschunkname, dfschunkname); numfixed++; break; } catch(Exception e) { if(ir >= rhosts.Length - 1) { Qizmt_Log(` Unable to repair DFS file '` + dfsfilename + `' chunk '` + dfschunkname + `' on host ` + Qizmt_MachineHost + ` because: ` + e.ToString()); } } } } } } }), dfsfilesinfos); if(fix) { Qizmt_Log(` ` + numfixed + ` of ` + numproblems + ` problems fixed on host ` + Qizmt_MachineHost); } else { Qizmt_Log(` ` + numproblems + ` problems found on host ` + Qizmt_MachineHost); } } ]]> </Remote> </Job> </Jobs> </SourceCode>").Replace('`', '"')); } if (userspecchunk) { Console.WriteLine("Checking DFS chunk file {0}...", userspec); } else if (userspecdfsfile) { Console.WriteLine("Checking DFS file dfs://{0}...", userspec); } else { Console.WriteLine("Checking DFS..."); } Exec("", LoadConfig(jobsfn), new string[] { }, false, false); } finally { try { System.IO.File.Delete(jobsfn); } catch { } foreach (KeyValuePair <string, StringBuilder> kvp in sbdfsfiles) { try { System.IO.File.Delete(Surrogate.NetworkPathForHost(kvp.Key) + @"\dfscheck" + tempfnpost); } catch { } } } }
public static void ExecOneRemote(SourceCode.Job cfgj, string[] ExecArgs, bool verbose, bool verbosereplication) { if (verbose) { Console.WriteLine("[{0}] [Remote: {2}]", System.DateTime.Now.ToString(), System.DateTime.Now.Millisecond, cfgj.NarrativeName); } string logname = Surrogate.SafeTextPath(cfgj.NarrativeName) + "_" + Guid.NewGuid().ToString() + ".j" + sjid + "_log.txt"; //System.Threading.Thread.Sleep(8000); /*if (cfgj.IOSettings.DFS_IOs == null || cfgj.IOSettings.DFS_IOs.Length == 0) * { * Console.Error.WriteLine("One or more IOSettings/DFS_IO needed in configuration for 'remote'"); * return; * }*/ // Could provide BlockID here, which is just the n-th DFS_IO entry. //cfgj.Remote dfs dc = LoadDfsConfig(); string[] slaves = dc.Slaves.SlaveList.Split(',', ';'); if (dc.Slaves.SlaveList.Length == 0 || slaves.Length < 1) { throw new Exception("SlaveList expected in " + dfs.DFSXMLNAME); } if (dc.Replication > 1) { string[] slavesbefore = slaves; slaves = ExcludeUnhealthySlaveMachines(slaves, true).ToArray(); if (slavesbefore.Length - slaves.Length >= dc.Replication) { throw new Exception("Not enough healthy machines to run job (hit replication count)"); } } if (cfgj.IOSettings.DFS_IO_Multis != null) { cfgj.ExpandDFSIOMultis(slaves.Length, MySpace.DataMining.DistributedObjects.MemoryUtils.NumberOfProcessors); } Dictionary <string, int> slaveIDs = new Dictionary <string, int>(); for (int si = 0; si < slaves.Length; si++) { slaveIDs.Add(slaves[si].ToUpper(), si); } bool aborting = false; try { List <RemoteBlockInfo> blocks = new List <RemoteBlockInfo>(cfgj.IOSettings.DFS_IOs.Length); if (verbose) { Console.WriteLine("{0} processes on {1} machines:", cfgj.IOSettings.DFS_IOs.Length, slaves.Length); } List <string> outputdfsdirs = new List <string>(slaves.Length); { for (int i = 0; i < slaves.Length; i++) { try { outputdfsdirs.Add(NetworkPathForHost(slaves[i])); } catch (Exception e) { Console.Error.WriteLine(" {0}", e.Message); } } } string slaveconfigxml = ""; { System.Xml.XmlDocument pdoc = new System.Xml.XmlDocument(); { System.IO.MemoryStream ms = new System.IO.MemoryStream(); System.Xml.Serialization.XmlSerializer xs = new System.Xml.Serialization.XmlSerializer(typeof(dfs)); xs.Serialize(ms, dc); ms.Seek(0, System.IO.SeekOrigin.Begin); pdoc.Load(ms); } string xml = pdoc.DocumentElement.SelectSingleNode("./slave").OuterXml; //System.Threading.Thread.Sleep(8000); slaveconfigxml = xml; } { // Temporary: for (int si = 0; si < slaves.Length; si++) { System.Threading.Mutex m = new System.Threading.Mutex(false, "AEL_SC_" + slaves[si]); try { m.WaitOne(); } catch (System.Threading.AbandonedMutexException) { } try { System.IO.File.WriteAllText(NetworkPathForHost(slaves[si]) + @"\slaveconfig.j" + sjid + ".xml", slaveconfigxml); } catch { } finally { m.ReleaseMutex(); m.Close(); } } } int nextslave = (new Random(DateTime.Now.Millisecond / 2 + System.Diagnostics.Process.GetCurrentProcess().Id / 2)).Next() % slaves.Length; int hosttypes = 0; List <int> outputrecordlengths = new List <int>(); List <int> inputrecordlengths = new List <int>(); for (int BlockID = 0; BlockID < cfgj.IOSettings.DFS_IOs.Length; BlockID++) { int slaveHostID = 0; RemoteBlockInfo bi = new RemoteBlockInfo(); bi.sampledist = dc.DataNodeBaseSize / dc.DataNodeSamples; bi.BlockID = BlockID; bi.blockcount = cfgj.IOSettings.DFS_IOs.Length; if (string.IsNullOrEmpty(cfgj.IOSettings.DFS_IOs[BlockID].Host)) { if (0 != hosttypes && 1 != hosttypes) { throw new Exception("DFS_IO/Host tag must be specified for all or none"); } hosttypes = 1; bi.SlaveHost = slaves[nextslave]; slaveHostID = nextslave; bi.explicithost = false; } else { if (0 != hosttypes && 2 != hosttypes) { throw new Exception("DFS_IO/Host tag must be specified for all or none"); } hosttypes = 2; bi.SlaveHost = cfgj.IOSettings.DFS_IOs[BlockID].Host; slaveHostID = slaveIDs[bi.SlaveHost.ToUpper()]; bi.explicithost = true; } bi.ExecArgs = ExecArgs; if (++nextslave >= slaves.Length) { nextslave = 0; } bi.logname = logname; bi.outputdfsdirs = outputdfsdirs; bi.slaves = slaves; bi.baseoutputfilesize = dc.DataNodeBaseSize; bi.cfgj = cfgj; bi.DFSWriter = cfgj.IOSettings.DFS_IOs[BlockID].DFSWriter.Trim(); bi.Meta = cfgj.IOSettings.DFS_IOs[BlockID].Meta; List <string> dfswriters = new List <string>(); if (bi.DFSWriter.Length > 0) { string[] writers = bi.DFSWriter.Split(';'); for (int wi = 0; wi < writers.Length; wi++) { string thiswriter = writers[wi].Trim(); if (thiswriter.Length == 0) { continue; } int ic = thiswriter.IndexOf('@'); int reclen = -1; if (-1 != ic) { try { reclen = Surrogate.GetRecordSize(thiswriter.Substring(ic + 1)); thiswriter = thiswriter.Substring(0, ic); } catch (FormatException e) { Console.Error.WriteLine("Error: remote output record length error: {0} ({1})", thiswriter, e.Message); SetFailure(); return; } catch (OverflowException e) { Console.Error.WriteLine("Error: remote output record length error: {0} ({1})", thiswriter, e.Message); SetFailure(); return; } } string outfn = thiswriter; if (outfn.StartsWith(@"dfs://", StringComparison.OrdinalIgnoreCase)) { outfn = outfn.Substring(6); } string reason = ""; if (dfs.IsBadFilename(outfn, out reason)) { Console.Error.WriteLine("Invalid output file: {0}", reason); return; } if (null != DfsFindAny(dc, outfn)) { Console.Error.WriteLine("Error: output file already exists in DFS: {0}", outfn); return; } dfswriters.Add(thiswriter); outputrecordlengths.Add(reclen); } } else { dfswriters.Add(""); outputrecordlengths.Add(-1); } bi.DFSWriters = dfswriters; bi.verbose = verbose; bi.rem = new MySpace.DataMining.DistributedObjects5.Remote(cfgj.NarrativeName + "_remote"); bi.rem.CookRetries = dc.slave.CookRetries; bi.rem.CookTimeout = dc.slave.CookTimeout; bi.rem.DfsSampleDistance = bi.sampledist; bi.rem.CompressFileOutput = dc.slave.CompressDfsChunks; bi.rem.LocalCompile = true; bi.rem.OutputStartingPoint = slaveHostID; bi.rem.CompilerOptions = cfgj.IOSettings.CompilerOptions; bi.rem.CompilerVersion = cfgj.IOSettings.CompilerVersion; if (cfgj.AssemblyReferencesCount > 0) { cfgj.AddAssemblyReferences(bi.rem.CompilerAssemblyReferences, Surrogate.NetworkPathForHost(dc.Slaves.GetFirstSlave())); } if (cfgj.OpenCVExtension != null) { bi.rem.AddOpenCVExtension(); } if (cfgj.MemCache != null) { bi.rem.AddMemCacheExtension(); } if (cfgj.Unsafe != null) { bi.rem.AddUnsafe(); } { List <dfs.DfsFile.FileNode> nodes = new List <dfs.DfsFile.FileNode>(); List <string> mapfileswithnodes = null; List <int> nodesoffsets = null; IList <string> mapfiles = SplitInputPaths(dc, cfgj.IOSettings.DFS_IOs[BlockID].DFSReader); if (mapfiles.Count > 0) { mapfileswithnodes = new List <string>(mapfiles.Count); nodesoffsets = new List <int>(mapfiles.Count); } for (int i = 0; i < mapfiles.Count; i++) { string dp = mapfiles[i].Trim(); int inreclen = -1; if (0 != dp.Length) // Allow empty entry where input isn't wanted. { if (dp.StartsWith("dfs://", StringComparison.OrdinalIgnoreCase)) { dp = dp.Substring(6); } { int ic = dp.IndexOf('@'); if (-1 != ic) { try { inreclen = Surrogate.GetRecordSize(dp.Substring(ic + 1)); dp = dp.Substring(0, ic); } catch (FormatException e) { Console.Error.WriteLine("Error: remote input record length error: {0} ({1})", dp, e.Message); SetFailure(); return; } catch (OverflowException e) { Console.Error.WriteLine("Error: remote input record length error: {0} ({1})", dp, e.Message); SetFailure(); return; } } } dfs.DfsFile df; if (inreclen > 0 || inreclen == -2) { df = DfsFind(dc, dp, DfsFileTypes.BINARY_RECT); if (null != df && inreclen != df.RecordLength) { Console.Error.WriteLine("Error: remote input file does not have expected record length of {0}: {1}@{2}", inreclen, dp, df.RecordLength); SetFailure(); return; } } else { df = DfsFind(dc, dp); } if (null == df) { //throw new Exception("Remote input file not found in DFS: " + dp); Console.Error.WriteLine("Remote input file not found in DFS: {0}", dp); return; } if (df.Nodes.Count > 0) { mapfileswithnodes.Add(dp); nodesoffsets.Add(nodes.Count); inputrecordlengths.Add(inreclen); nodes.AddRange(df.Nodes); } } } bi.dfsinputpaths = new List <string>(nodes.Count); //MapNodesToNetworkPaths(nodes, bi.dfsinputpaths); dfs.MapNodesToNetworkStarPaths(nodes, bi.dfsinputpaths); bi.dfsinputfilenames = mapfileswithnodes; bi.dfsinputnodesoffsets = nodesoffsets; } blocks.Add(bi); bi.thread = new System.Threading.Thread(new System.Threading.ThreadStart(bi.threadproc)); bi.thread.Name = "RemoteJobBlock" + bi.BlockID; } MySpace.DataMining.DistributedObjects.StaticGlobals.DSpace_InputRecordLength = inputrecordlengths.Count > 0 ? inputrecordlengths[0] : -1; MySpace.DataMining.DistributedObjects.StaticGlobals.DSpace_OutputRecordLength = outputrecordlengths.Count > 0 ? outputrecordlengths[0] : -1; // Need to start threads separately due to StaticGlobals being updated. for (int BlockID = 0; BlockID < cfgj.IOSettings.DFS_IOs.Length; BlockID++) { RemoteBlockInfo bi = blocks[BlockID]; bi.rem.InputRecordLength = MySpace.DataMining.DistributedObjects.StaticGlobals.DSpace_InputRecordLength; bi.rem.InputRecordLengths = inputrecordlengths; bi.rem.OutputRecordLength = MySpace.DataMining.DistributedObjects.StaticGlobals.DSpace_OutputRecordLength; bi.rem.OutputRecordLengths = outputrecordlengths; AELight_StartTraceThread(bi.thread); } for (int BlockID = 0; BlockID < blocks.Count; BlockID++) { AELight_JoinTraceThread(blocks[BlockID].thread); blocks[BlockID].rem.Close(); if (blocks[BlockID].blockfail) { Console.Error.WriteLine("BlockID {0} on host '{1}' did not complete successfully", BlockID, (blocks[BlockID].SlaveHost != null) ? blocks[BlockID].SlaveHost : "<null>"); continue; } } List <string> dfsnames = new List <string>(); List <string> dfsnamesreplicating = new List <string>(); // Reload DFS config to make sure changes since starting get rolled in, and make sure the output file wasn't created in that time... using (LockDfsMutex()) // Needed: change between load & save should be atomic. { dc = LoadDfsConfig(); for (int BlockID = 0; BlockID < blocks.Count; BlockID++) { if (blocks[BlockID].blockfail) { continue; } { bool anyoutput = false; bool nonemptyoutputpath = false; for (int oi = 0; oi < blocks[BlockID].DFSWriters.Count; oi++) { string dfswriter = blocks[BlockID].DFSWriters[oi]; if (string.IsNullOrEmpty(dfswriter)) { if (blocks[BlockID].outputdfsnodeses[oi].Count > 0) { Console.Error.WriteLine("Output data detected with no DFSWriter specified"); } } else { { if (null != DfsFind(dc, dfswriter)) { Console.Error.WriteLine("Error: output file was created during job: {0}", dfswriter); continue; } string dfspath = dfswriter; { nonemptyoutputpath = true; dfs.DfsFile df = new dfs.DfsFile(); if (blocks[BlockID].rem.OutputRecordLengths[oi] > 0) { df.XFileType = DfsFileTypes.BINARY_RECT + "@" + blocks[BlockID].rem.OutputRecordLengths[oi].ToString(); } else if (blocks[BlockID].rem.OutputRecordLengths[oi] == -2) { df.XFileType = DfsFileTypes.BINARY_RECT + "@?"; } df.Nodes = new List <dfs.DfsFile.FileNode>(); df.Size = -1; // Preset if (dfspath.StartsWith("dfs://", StringComparison.OrdinalIgnoreCase)) { dfspath = dfspath.Substring(6); } string dfspathreplicating = ".$" + dfspath + ".$replicating-" + Guid.NewGuid().ToString(); if (null != dc.FindAny(dfspathreplicating)) { Console.Error.WriteLine("Error: file exists: file put into DFS from another location during job: " + dfspathreplicating); SetFailure(); return; } dfsnames.Add(dfspath); dfsnamesreplicating.Add(dfspathreplicating); df.Name = dfspathreplicating; bool anybad = false; long totalsize = 0; { int i = BlockID; for (int j = 0; j < blocks[i].outputdfsnodeses[oi].Count; j++) { dfs.DfsFile.FileNode fn = new dfs.DfsFile.FileNode(); fn.Host = blocks[i].slaves[(blocks[i].rem.OutputStartingPoint + j) % blocks[i].slaves.Count]; fn.Name = blocks[i].outputdfsnodeses[oi][j]; df.Nodes.Add(fn); fn.Length = -1; // Preset fn.Position = -1; // Preset if (anybad) { continue; } fn.Length = blocks[i].outputsizeses[oi][j]; fn.Position = totalsize; // Position must be set before totalsize updated! if (blocks[i].outputdfsnodeses[oi].Count != blocks[i].outputsizeses[oi].Count) { anybad = true; continue; } totalsize += blocks[i].outputsizeses[oi][j]; } } if (!anybad) { df.Size = totalsize; } if (totalsize != 0) { anyoutput = true; } // Always add the file to DFS, even if blank! dc.Files.Add(df); } } } } if (!anyoutput && verbose && nonemptyoutputpath) { Console.Write(" (no DFS output) "); ConsoleFlush(); } } } UpdateDfsXml(dc); } ReplicationPhase(verbosereplication, blocks.Count, slaves, dfsnamesreplicating); using (LockDfsMutex()) // Needed: change between load & save should be atomic. { dc = LoadDfsConfig(); // Reload in case of change or user modifications. for (int nfile = 0; nfile < dfsnames.Count; nfile++) { string dfspath = dfsnames[nfile]; string dfspathreplicating = dfsnamesreplicating[nfile]; { dfs.DfsFile dfu = dc.FindAny(dfspathreplicating); if (null != dfu) { if (null != DfsFindAny(dc, dfspath)) { Console.Error.WriteLine("Error: file exists: file put into DFS from another location during job"); SetFailure(); continue; } dfu.Name = dfspath; } } } UpdateDfsXml(dc); } if (verbose) { Console.WriteLine(); // Line after output chars. } } catch (System.Threading.ThreadAbortException) { aborting = true; } finally { { for (int si = 0; si < slaves.Length; si++) { System.Threading.Mutex m = new System.Threading.Mutex(false, "AEL_SC_" + slaves[si]); try { m.WaitOne(); } catch (System.Threading.AbandonedMutexException) { } try { System.IO.File.Delete(NetworkPathForHost(slaves[si]) + @"\slaveconfig.j" + sjid + ".xml"); } catch { } finally { m.ReleaseMutex(); m.Close(); } } } if (!aborting) { CheckUserLogs(slaves, logname); } } if (verbose) { Console.WriteLine(); Console.WriteLine("[{0}] Done", System.DateTime.Now.ToString(), System.DateTime.Now.Millisecond); for (int i = 0; i < cfgj.IOSettings.DFS_IOs.Length; i++) { Console.WriteLine("Output: {0}", cfgj.IOSettings.DFS_IOs[i].DFSWriter); } } }
bool DoSave() { if (IsReadOnly) { SetStatus("Cannot save read-only"); return(false); } string DocText = Doc.Text; { try { string fn = PrettyFile; if (fn.StartsWith("dfs://", StringComparison.OrdinalIgnoreCase)) { fn = fn.Substring(6); } dfs dc = Surrogate.ReadMasterDfsConfig(); string[] slaves = dc.Slaves.SlaveList.Split(';'); if (0 == slaves.Length || dc.Slaves.SlaveList.Length == 0) { throw new Exception("DFS SlaveList error (machines)"); } Random rnd = new Random(DateTime.Now.Millisecond / 2 + System.Diagnostics.Process.GetCurrentProcess().Id / 2); string newactualfilehost = slaves[rnd.Next() % slaves.Length]; string newactualfilename = dfs.GenerateZdFileDataNodeName(fn); string myActualFile = Surrogate.NetworkPathForHost(newactualfilehost) + @"\" + newactualfilename; { byte[] smallbuf = new byte[4]; MySpace.DataMining.DistributedObjects.Entry.ToBytes(4, smallbuf, 0); using (System.IO.FileStream fs = new System.IO.FileStream(myActualFile, System.IO.FileMode.CreateNew, System.IO.FileAccess.Write)) { fs.Write(smallbuf, 0, 4); byte[] buf = Encoding.UTF8.GetBytes(Doc.Text); fs.Write(buf, 0, buf.Length); } if (IsNewFile) { Console.Write(MySpace.DataMining.DistributedObjects.Exec.Shell( "DSpace -dfsbind \"" + newactualfilehost + "\" \"" + newactualfilename + "\" \"" + fn + "\" " + DfsFileTypes.NORMAL + " -h4")); } else { string tempdfsfile = fn + Guid.NewGuid().ToString() + dfs.TEMP_FILE_MARKER; Console.Write(MySpace.DataMining.DistributedObjects.Exec.Shell( "DSpace -dfsbind \"" + newactualfilehost + "\" \"" + newactualfilename + "\" \"" + tempdfsfile + "\" " + DfsFileTypes.NORMAL + " -h4")); MySpace.DataMining.DistributedObjects.Exec.Shell("DSpace swap \"" + tempdfsfile + "\" \"" + fn + "\""); MySpace.DataMining.DistributedObjects.Exec.Shell("DSpace delete \"" + tempdfsfile + "\"", true); // suppresserrors=true } } //ActualFile = myActualFile; // Only update this when fully committed to DFS! IsNewFile = false; } catch (Exception e) { Console.WriteLine(e.ToString()); MessageBox.Show(this, "Unable to save new file to DFS:\r\n\r\n" + e.Message, "Save-New Error", MessageBoxButtons.OK, MessageBoxIcon.Error); return(false); // Important! don't continue with any of the rest if this fails! } } Doc.Modified = false; // ! return(true); }
public static void MemCacheCommand(string[] args) { if (args.Length < 1) { Console.Error.WriteLine("Expected memcache sub-command"); SetFailure(); return; } string act = args[0].ToLower(); switch (act) { case "create": { string mcname = null; string mcschema = null; int mcsegsize = -1; EachArgument(args, 1, new Action <string, string>( delegate(string key, string value) { key = key.ToLower(); switch (key) { case "name": mcname = value; break; case "schema": mcschema = value; break; case "segment": case "segsize": case "segmentsize": mcsegsize = ParseCapacity(value); break; } })); if (string.IsNullOrEmpty(mcname)) { Console.Error.WriteLine("Expected name=<MemCacheName>"); SetFailure(); return; } if (string.IsNullOrEmpty(mcschema)) { Console.Error.WriteLine("Expected schema=<schema>"); SetFailure(); return; } if (-1 != mcsegsize && mcsegsize < 1024) { Console.Error.WriteLine("Error: segment={0} is too small", mcsegsize); SetFailure(); return; } if (mcname.StartsWith("dfs://", StringComparison.OrdinalIgnoreCase)) { mcname = mcname.Substring(6); } { string reason; if (dfs.IsBadFilename(mcname, out reason)) { Console.Error.WriteLine("MemCache cannot be named '{0}': {1}", mcname, reason); SetFailure(); return; } } dfs.DfsFile.ConfigMemCache cmc = new dfs.DfsFile.ConfigMemCache(); cmc.MetaFileName = "mcm." + Surrogate.SafeTextPath(mcname) + ".mcm"; cmc.Schema = mcschema; List <int> offsets = new List <int>(); cmc.RowLength = Surrogate.GetRecordInfo(mcschema, out cmc.KeyOffset, out cmc.KeyLength, offsets); /*if (0 == cmc.KeyOffset * && cmc.RowLength == cmc.KeyLength * && -1 == mcschema.IndexOf('[')) * { * Console.WriteLine("Note: no key was specified, the key is the entire row"); * }*/ if (-1 == mcsegsize) { const int defsegsize = 0x400 * 0x400 * 64; cmc.SegmentSize = defsegsize - (defsegsize % cmc.RowLength); } else { if (0 != (mcsegsize % cmc.RowLength)) { Console.Error.WriteLine("Segment size must be a multiple of the row length"); Console.Error.WriteLine("Nearest segment size is {0} bytes", mcsegsize - (mcsegsize % cmc.RowLength)); SetFailure(); return; } cmc.SegmentSize = mcsegsize; } { StringBuilder sbFieldOffsets = new StringBuilder(); foreach (int offset in offsets) { if (sbFieldOffsets.Length != 0) { sbFieldOffsets.Append(','); } sbFieldOffsets.Append(offset); } cmc.FieldOffsets = sbFieldOffsets.ToString(); } dfs.DfsFile df = new dfs.DfsFile(); df.Nodes = new List <dfs.DfsFile.FileNode>(0); df.MemCache = cmc; df.Name = mcname; df.XFileType = DfsFileTypes.BINARY_RECT + "@" + cmc.RowLength; df.Size = 0; dfs dc = LoadDfsConfig(); { dfs.DfsFile df2 = dc.FindAny(df.Name); if (null != df2) { Console.Error.WriteLine("Error: a file named '{0}' already exists", df2.Name); SetFailure(); return; } } { string startmeta = GetMemCacheMetaFileHeader(df); string[] slaves = dc.Slaves.SlaveList.Split(';'); int totalworkercount = dc.Blocks.TotalCount; // Subprocess_TotalPrime StringBuilder[] permachine = new StringBuilder[slaves.Length]; //byte[] HEADER = new byte[4]; //MySpace.DataMining.DistributedObjects.Entry.ToBytes(4, HEADER, 0); for (int i = 0; i < permachine.Length; i++) { permachine[i] = new StringBuilder(256); } { int si = -1; for (int workerid = 0; workerid < totalworkercount; workerid++) { if (++si >= slaves.Length) { si = 0; } StringBuilder sb = permachine[si]; sb.AppendFormat("##{1}:{0}", Environment.NewLine, workerid); // There's no segments, but write a dummy one for bookkeeping. foreach (char snc in "MemCache_" + mcname + "_empty") { sb.Append(snc); } { sb.Append(' '); /* * StringBuilder newchunkpath = new StringBuilder(100); * newchunkpath.Append(Surrogate.NetworkPathForHost(slaves[si])); * newchunkpath.Append('\\'); * */ // Make up a data node chunk name. foreach (char ch in MakeMemCacheChunkName(mcname, workerid)) { //newchunkpath.Append(ch); sb.Append(ch); } // Write the empty chunk. //System.IO.File.WriteAllBytes(newchunkpath.ToString(), HEADER); } //if (IsLastSegment) // true { sb.Append(' '); string shexlen = string.Format("{0:x8}", 0); // Zero-length! for (int i = 0; i < shexlen.Length; i++) { sb.Append(shexlen[i]); } } sb.AppendLine(); } } for (int si = 0; si < slaves.Length; si++) { string slave = slaves[si]; string fp = Surrogate.NetworkPathForHost(slave) + @"\" + cmc.MetaFileName; using (System.IO.StreamWriter sw = new System.IO.StreamWriter(fp)) { sw.Write(startmeta); sw.Write(permachine[si].ToString()); } } } using (LockDfsMutex()) { dc = LoadDfsConfig(); // Load again in update lock. { dfs.DfsFile df2 = dc.FindAny(df.Name); if (null != df2) { Console.Error.WriteLine("Error: a file named '{0}' already exists", df2.Name); SetFailure(); return; } } dc.Files.Add(df); UpdateDfsXml(dc); } try { // Need to commit it so that the empty chunks are in the metadata for bookkeeping. // This has to be done after actually adding it to dfsxml. MemCacheFlush(mcname); } catch (Exception e) { try { MemCacheDelete(mcname, false); } catch { } Console.Error.WriteLine("Error: unable to commit newly created MemCache '{0}'; because:{1}{2}", mcname, Environment.NewLine, e.ToString()); SetFailure(); return; } Console.WriteLine("Successfully created MemCache '{0}'", mcname); } break; case "delete": case "del": case "rm": { string mcname = null; EachArgument(args, 1, new Action <string, string>( delegate(string key, string value) { key = key.ToLower(); switch (key) { case "name": mcname = value; break; } })); if (string.IsNullOrEmpty(mcname)) { Console.Error.WriteLine("Expected name=<MemCacheName>"); SetFailure(); return; } MemCacheDelete(mcname, true); } break; case "flush": case "commit": { string mcname = null; EachArgument(args, 1, new Action <string, string>( delegate(string key, string value) { key = key.ToLower(); switch (key) { case "name": mcname = value; break; } })); if (string.IsNullOrEmpty(mcname)) { Console.Error.WriteLine("Expected name=<MemCacheName>"); SetFailure(); return; } try { MemCacheFlush(mcname); Console.WriteLine("Done"); } catch (Exception e) { Console.WriteLine(" Commit was unsuccessful because: {0}", e.Message); Console.WriteLine(); Console.Error.WriteLine(e.ToString()); SetFailure(); return; } } break; case "release": case "rollback": { string mcname = null; bool force = false; EachArgument(args, 1, new Action <string, string>( delegate(string key, string value) { key = key.ToLower(); switch (key) { case "name": mcname = value; break; case "-f": force = true; break; } })); if (string.IsNullOrEmpty(mcname)) { Console.Error.WriteLine("Expected name=<MemCacheName>"); SetFailure(); return; } try { MemCacheRelease(mcname, force); Console.WriteLine("Done"); } catch (Exception e) { string exception = e.ToString(); if (-1 != exception.IndexOf("MemCacheWarning")) { Console.WriteLine("Warning: " + exception); } else { Console.Error.WriteLine(exception); string ioe = "InvalidOperationException:"; if (!force && -1 != exception.IndexOf(ioe)) { try { string emsg = exception.Substring(exception.IndexOf(ioe) + ioe.Length) .Split('\r', '\n')[0].Trim(); System.Threading.Thread.Sleep(100); Console.WriteLine(); Console.WriteLine("{0}{2}{1}", false ? "\u00014" : "", false ? "\u00010" : "", emsg); System.Threading.Thread.Sleep(100); } catch { } Console.Error.WriteLine("Use rollback -f followed by killall to force rollback"); } SetFailure(); return; } } } break; case "load": { string mcname = null; EachArgument(args, 1, new Action <string, string>( delegate(string key, string value) { key = key.ToLower(); switch (key) { case "name": mcname = value; break; } })); if (string.IsNullOrEmpty(mcname)) { Console.Error.WriteLine("Expected name=<MemCacheName>"); SetFailure(); return; } MemCacheLoad(mcname); Console.WriteLine("Done"); } break; case "info": case "information": { string mcname = null; EachArgument(args, 1, new Action <string, string>( delegate(string key, string value) { key = key.ToLower(); switch (key) { case "name": mcname = value; break; } })); if (string.IsNullOrEmpty(mcname)) { Console.Error.WriteLine("Expected name=<MemCacheName>"); SetFailure(); return; } if (mcname.StartsWith("dfs://", StringComparison.OrdinalIgnoreCase)) { mcname = mcname.Substring(6); } dfs dc = LoadDfsConfig(); dfs.DfsFile df = dc.FindAny(mcname); if (null == df || df.MemCache == null) { Console.Error.WriteLine("Error: '{0}' is not a MemCache", (null == df ? mcname : df.Name)); SetFailure(); return; } Console.WriteLine(" MemCache: {0}", df.Name); Console.WriteLine(" Segment size: {0} ({1})", GetFriendlyByteSize(df.MemCache.SegmentSize), df.MemCache.SegmentSize); Console.WriteLine(" Schema: {0}", df.MemCache.Schema); Console.WriteLine(" Row Length: {0}", df.MemCache.RowLength); Console.WriteLine(" Key Offset: {0}", df.MemCache.KeyOffset); Console.WriteLine(" Key Length: {0}", df.MemCache.KeyLength); } break; default: Console.Error.WriteLine("No such sub-command for memcache: {0}", act); SetFailure(); return; } }
public static void ExecOneLocal(SourceCode.Job cfgj, string[] ExecArgs, bool verbose) { if (verbose) { Console.WriteLine("[{0}] [Local: {2}]", System.DateTime.Now.ToString(), System.DateTime.Now.Millisecond, cfgj.NarrativeName); } int BlockID = 0; string SlaveIP = null; string logname = Surrogate.SafeTextPath(cfgj.NarrativeName) + "_" + Guid.NewGuid().ToString() + ".j" + sjid + "_log.txt"; bool aborting = false; try { dfs dc = LoadDfsConfig(); string firstslave = dc.Slaves.GetFirstSlave(); string SlaveHost = cfgj.IOSettings.LocalHost; if (SlaveHost == null || SlaveHost.Length == 0) { SlaveHost = firstslave; } SlaveIP = IPAddressUtil.GetIPv4Address(SlaveHost); MySpace.DataMining.DistributedObjects5.Remote rem = new MySpace.DataMining.DistributedObjects5.Remote(cfgj.NarrativeName + "_local"); rem.OutputStartingPoint = BlockID; rem.LocalCompile = true; rem.CompilerOptions = cfgj.IOSettings.CompilerOptions; rem.CompilerVersion = cfgj.IOSettings.CompilerVersion; if (cfgj.OpenCVExtension != null) { rem.AddOpenCVExtension(); } if (cfgj.MemCache != null) { rem.AddMemCacheExtension(); } if (cfgj.Unsafe != null) { rem.AddUnsafe(); } if (cfgj.AssemblyReferencesCount > 0) { cfgj.AddAssemblyReferences(rem.CompilerAssemblyReferences, Surrogate.NetworkPathForHost(firstslave)); } rem.SetJID(jid, CurrentJobFileName + " Local: " + cfgj.NarrativeName); rem.AddBlock(SlaveHost + @"|" + (cfgj.ForceStandardError != null ? "&" : "") + logname + @"|slaveid=0"); rem.Open(); string codectx = (@" public const int DSpace_BlockID = " + BlockID.ToString() + @"; public const int DSpace_ProcessID = DSpace_BlockID; public const int Qizmt_ProcessID = DSpace_ProcessID; public const int DSpace_BlocksTotalCount = 1; public const int DSpace_ProcessCount = DSpace_BlocksTotalCount; public const int Qizmt_ProcessCount = DSpace_ProcessCount; public const string DSpace_SlaveHost = `" + SlaveHost + @"`; public const string DSpace_MachineHost = DSpace_SlaveHost; public const string Qizmt_MachineHost = DSpace_MachineHost; public const string DSpace_SlaveIP = `" + SlaveIP + @"`; public const string DSpace_MachineIP = DSpace_SlaveIP; public const string Qizmt_MachineIP = DSpace_MachineIP; public static readonly string[] DSpace_ExecArgs = new string[] { " + ExecArgsCode(ExecArgs) + @" }; public static readonly string[] Qizmt_ExecArgs = DSpace_ExecArgs; public const string DSpace_ExecDir = @`" + System.Environment.CurrentDirectory + @"`; public const string Qizmt_ExecDir = DSpace_ExecDir; static string Shell(string line, bool suppresserrors) { return MySpace.DataMining.DistributedObjects.Exec.Shell(line, suppresserrors); } static string Shell(string line) { return MySpace.DataMining.DistributedObjects.Exec.Shell(line, false); } const string _userlogname = `" + logname + @"`; static System.Threading.Mutex _logmutex = new System.Threading.Mutex(false, `distobjlog`); private static int userlogsremain = " + AELight.maxuserlogs.ToString() + @"; public static void Qizmt_Log(string line) { DSpace_Log(line); } public static void DSpace_Log(string line) { if(--userlogsremain < 0) { return; } try { _logmutex.WaitOne(); } catch (System.Threading.AbandonedMutexException) { } try { using (System.IO.StreamWriter fstm = System.IO.File.AppendText(_userlogname)) { fstm.WriteLine(`{0}`, line); } } finally { _logmutex.ReleaseMutex(); } } public static void Qizmt_LogResult(string line, bool passed) { DSpace_LogResult(line, passed); } public static void DSpace_LogResult(string name, bool passed) { if(passed) { DSpace_Log(`[\u00012PASSED\u00010] - ` + name); } else { DSpace_Log(`[\u00014FAILED\u00010] - ` + name); } } ").Replace('`', '"') + CommonDynamicCsCode; rem.LocalExec(codectx + cfgj.Local, cfgj.Usings); rem.Close(); if (verbose) { Console.Write('*'); ConsoleFlush(); } } catch (System.Threading.ThreadAbortException) { aborting = true; } finally { if (!aborting) { CheckUserLogs(new string[] { SlaveIP }, logname); } } if (verbose) { Console.WriteLine(); Console.WriteLine("[{0}] Done", System.DateTime.Now.ToString(), System.DateTime.Now.Millisecond); } }
static void ReplicationFailover(string[] args) { if (args.Length <= 1 || !System.IO.File.Exists(args[1])) { throw new Exception("Expected path to DFS.xml"); } string dfsxmlpath = args[1]; string dfsxmlpathbackup = dfsxmlpath + "$" + Guid.NewGuid().ToString(); string masterdir; { System.IO.FileInfo fi = new System.IO.FileInfo(dfsxmlpath); masterdir = fi.DirectoryName; // Directory's full path. } Surrogate.SetNewMetaLocation(masterdir); dfs olddfs = dfs.ReadDfsConfig_unlocked(dfsxmlpath); string masterhost = System.Net.Dns.GetHostName(); string[] allmachines; { string[] sl = olddfs.Slaves.SlaveList.Split(';'); List <string> aml = new List <string>(sl.Length + 1); aml.Add(masterhost); foreach (string slave in sl) { if (0 != string.Compare(IPAddressUtil.GetName(slave), IPAddressUtil.GetName(masterhost), StringComparison.OrdinalIgnoreCase)) { aml.Add(slave); } } allmachines = aml.ToArray(); } Console.WriteLine("Backing up DFS.xml to: {0} ...", dfsxmlpathbackup); try { System.IO.File.Delete(dfsxmlpathbackup); } catch { } System.IO.File.Move(dfsxmlpath, dfsxmlpathbackup); try { { // Note: added replication and much lower DataNodeBaseSize! Console.WriteLine("Formatting DFS with Replication=3 for test..."); Exec.Shell("Qizmt @format Machines=" + string.Join(",", allmachines) + " Replication=3 DataNodeBaseSize=1048576"); } { // Test logic: if (allmachines.Length < 3) { throw new Exception("This test needs a cluster of at least 3 machines!"); } long XBYTES = (long)4194304 * (long)allmachines.Length; Console.WriteLine("Generating data..."); Console.Write(" "); Exec.Shell("Qizmt gen data{476D6FE8-D645-41cc-83A1-3AB5E2DE23E7} " + (XBYTES / 4).ToString()); Console.Write("25%"); Exec.Shell("Qizmt gen data{61136275-16EC-4ff9-84CE-ACC967550181} " + (XBYTES / 4).ToString()); Console.Write("..50%"); Exec.Shell("Qizmt gen data{C76F6C06-EFC8-4808-B214-DB4D167171EB} " + (XBYTES / 2).ToString()); Console.Write("..100%"); Console.WriteLine(); Console.WriteLine("Ensure the cluster is perfectly healthy..."); EnsurePerfectQizmtHealtha(); Console.WriteLine("Running job on healthy cluster..."); string exec_md5; { string exectempdir = @"\\" + System.Net.Dns.GetHostName() + @"\C$\temp\qizmt\regression_test_ReplicationFailover-" + Guid.NewGuid().ToString(); if (!System.IO.Directory.Exists(exectempdir)) { System.IO.Directory.CreateDirectory(exectempdir); } string execfp = exectempdir + @"\exec{FA19CAB0-5225-4cc8-8728-9BFC3A1B834C}"; System.IO.File.WriteAllText(execfp, (@"<?xml version=`1.0` encoding=`utf-8`?> <SourceCode> <Jobs> <Job Name=`exec{FA19CAB0-5225-4cc8-8728-9BFC3A1B834C}`> <IOSettings> <JobType>mapreduce</JobType> <KeyLength>100</KeyLength> <DFSInput>dfs://data{*}</DFSInput> <DFSOutput>dfs://output{04454992-E2CD-4342-AEEB-1D0607B32D84}</DFSOutput> <KeyMajor>8</KeyMajor> <OutputMethod>sorted</OutputMethod> </IOSettings> <MapReduce> <Map> <![CDATA[ public virtual void Map(ByteSlice line, MapOutput output) { output.Add(line, ByteSlice.Prepare()); } ]]> </Map> <Reduce> <![CDATA[ public override void Reduce(ByteSlice key, RandomAccessEntries values, RandomAccessOutput output) { for(int i = 0; i < values.Length; i++) { output.Add(key); } } ]]> </Reduce> </MapReduce> </Job> </Jobs> </SourceCode> ").Replace('`', '"')); Exec.Shell("Qizmt importdir " + exectempdir); try { System.IO.File.Delete(execfp); System.IO.Directory.Delete(exectempdir); } catch { } Exec.Shell("Qizmt exec exec{FA19CAB0-5225-4cc8-8728-9BFC3A1B834C}"); exec_md5 = DfsSum("md5", "output{04454992-E2CD-4342-AEEB-1D0607B32D84}"); Exec.Shell("Qizmt del output{04454992-E2CD-4342-AEEB-1D0607B32D84}"); } try { Console.WriteLine("Disrupting 2 machines..."); { string badmachine = allmachines[allmachines.Length - 1]; Console.WriteLine(" Bad disk on {0}", badmachine); string netpath = Surrogate.NetworkPathForHost(badmachine); foreach (System.IO.FileInfo fi in (new System.IO.DirectoryInfo(netpath)).GetFiles("zd.*.zd")) { if (!fi.Name.StartsWith("zd.!.")) { System.IO.File.Move(fi.FullName, fi.DirectoryName + @"\zd.!." + fi.Name.Substring(3)); } } } { string badmachine = allmachines[allmachines.Length - 2]; Console.WriteLine(" Bad network connection on {0}", badmachine); Exec.Shell(@"sc \\" + badmachine + @" stop DistributedObjects"); } Console.WriteLine("Ensure the cluster is NOT perfectly healthy..."); { bool healthy; try { EnsurePerfectQizmtHealtha(); healthy = true; } catch { healthy = false; } if (healthy) { throw new Exception("Cluster is still healthy"); } } Console.WriteLine("Running job on unhealthy cluster..."); { try { Exec.Shell("Qizmt exec exec{FA19CAB0-5225-4cc8-8728-9BFC3A1B834C}"); } catch { // Replication will output a warning and throw an exception, // so we need to ignore that exception. // The MD5 check will ensure it ran fine. } string new_exec_md5 = DfsSum("md5", "output{04454992-E2CD-4342-AEEB-1D0607B32D84}"); Exec.Shell("Qizmt del output{04454992-E2CD-4342-AEEB-1D0607B32D84}"); if (new_exec_md5 != exec_md5) { throw new Exception("Output files from before and after disrupting cluster do not match"); } } } finally { { Console.WriteLine("Repairing disrupted disk"); string badmachine = allmachines[allmachines.Length - 1]; string netpath = Surrogate.NetworkPathForHost(badmachine); foreach (System.IO.FileInfo fi in (new System.IO.DirectoryInfo(netpath)).GetFiles("zd.!.*.zd")) { System.IO.File.Move(fi.FullName, fi.DirectoryName + @"\zd." + fi.Name.Substring(5)); } } } } Console.WriteLine("[PASSED] - " + string.Join(" ", args)); } finally { Console.WriteLine("Restoring DFS.xml backup..."); // Note: these are safe; the try/finally only wraps the new dfs. try { Exec.Shell("Qizmt del *"); } catch { } try { // Delete temp dfs.xml, it's being replaced with the good one. System.IO.File.Delete(dfsxmlpath); } catch { } System.IO.File.Move(dfsxmlpathbackup, dfsxmlpath); { // Note: killall issued to fix disrupted machines Console.WriteLine("Running killall to repair"); Exec.Shell("Qizmt killall -f"); } } }
static void MetaPath(string[] args) { { string thisservicedir = Surrogate.FetchServiceNetworkPath(System.Net.Dns.GetHostName()); string master = Surrogate.LocateMasterHost(thisservicedir); Surrogate.SetNewMasterHost(master); Surrogate.SetNewMetaLocation(thisservicedir); } // NetworkPathForHost works here due to the above. string internalpath = Surrogate.NetworkPathForHost(Surrogate.MasterHost) + @"\" + dfs.DFSXMLNAME; Console.WriteLine("Internal DFS.xml path: {0}", internalpath); string dfsxmlpath; if (args.Length <= 1 || !System.IO.File.Exists(args[1])) { //throw new Exception("Expected path to DFS.xml"); dfsxmlpath = null; } else { dfsxmlpath = args[1]; Console.WriteLine("Command-line path: {0}", dfsxmlpath); } string metapath = Exec.Shell("Qizmt metapath").Trim(); Console.WriteLine("Qizmt metapath path: {0}", metapath); Console.WriteLine("Comparing..."); if (null != dfsxmlpath) { if (!System.IO.File.Exists(dfsxmlpath)) { throw new Exception("Command-line path does not exist: " + dfsxmlpath); } if (!System.IO.File.Exists(metapath)) { throw new Exception("metapath path does not exist: " + metapath); } if (System.IO.File.ReadAllText(dfsxmlpath) != System.IO.File.ReadAllText(metapath)) { throw new Exception("metapath failure: command-line and metapath are not the same"); } } if (!System.IO.File.Exists(internalpath)) { throw new Exception("Internal path does not exist: " + internalpath); } if (!System.IO.File.Exists(metapath)) { throw new Exception("metapath path does not exist: " + metapath); } if (System.IO.File.ReadAllText(internalpath) != System.IO.File.ReadAllText(metapath)) { throw new Exception("metapath failure: internal path and metapath are not the same"); } Console.WriteLine("[PASSED] - " + string.Join(" ", args)); }
static void RemoveSurrogate(string[] args) { if (args.Length <= 1 || !System.IO.File.Exists(args[1])) { throw new Exception("Expected path to DFS.xml"); } string dfsxmlpath = args[1]; string dfsxmlpathbackup = dfsxmlpath + "$" + Guid.NewGuid().ToString(); bool incluster; if (args[2] == "incluster") { incluster = true; } else if (args[2] == "isolated") { incluster = false; } else { throw new Exception("Expected: incluster or isolated"); } string masterdir; { System.IO.FileInfo fi = new System.IO.FileInfo(dfsxmlpath); masterdir = fi.DirectoryName; // Directory's full path. } Surrogate.SetNewMetaLocation(masterdir); string masterslavedat = masterdir + @"\slave.dat"; dfs olddfs = dfs.ReadDfsConfig_unlocked(dfsxmlpath); string masterhost = System.Net.Dns.GetHostName(); List <string> otherhosts = new List <string>(); // Non-surrogate machines, reguardless if participating surrogate or not. foreach (string slave in olddfs.Slaves.SlaveList.Split(';')) { if (0 != string.Compare(IPAddressUtil.GetName(slave), IPAddressUtil.GetName(masterhost), StringComparison.OrdinalIgnoreCase)) { otherhosts.Add(slave); } } string newmaster = otherhosts[0]; Console.WriteLine("Backing up DFS.xml to: {0} ...", dfsxmlpathbackup); try { System.IO.File.Delete(dfsxmlpathbackup); } catch { } System.IO.File.Move(dfsxmlpath, dfsxmlpathbackup); try { StringBuilder sbmachines = new StringBuilder(1000); if (incluster) { sbmachines.Append(masterhost); } foreach (string host in otherhosts) { if (0 != sbmachines.Length) { sbmachines.Append(','); } sbmachines.Append(host); } Console.WriteLine("Formatting DFS for test..."); Exec.Shell("Qizmt @format Machines=" + sbmachines.ToString()); Console.WriteLine("Adding some files to DFS..."); Console.Write(" "); Exec.Shell("Qizmt bingen 1MB 1MB 50"); Console.Write("10%"); Exec.Shell("Qizmt examples"); Console.Write("..15%"); Exec.Shell("Qizmt wordgen 10MB 10MB 100"); Console.Write("..50%"); Exec.Shell("Qizmt asciigen 50MB 50MB 500"); Console.Write("..100%"); Console.WriteLine(); int ls_output_linecount = Exec.Shell("Qizmt ls").Split('\n').Length; Console.WriteLine("Ensure the cluster is perfectly healthy..."); EnsurePerfectQizmtHealtha(); Console.WriteLine("Run test job, save output..."); string md5_10MB_output = Exec.Shell("Qizmt md5 10MB"); Console.WriteLine("Removing Surrogate (removemachine {0}) ...", masterhost); Console.WriteLine(Exec.Shell("Qizmt removemachine " + masterhost)); Console.WriteLine("Interface with new surrogate..."); System.IO.File.WriteAllText(masterslavedat, "master=" + newmaster + Environment.NewLine); { // Not comparing contents because of the free disk space line. int new_ls_output_linecount = Exec.Shell("Qizmt ls").Split('\n').Length; if (ls_output_linecount != new_ls_output_linecount) { throw new Exception("Cluster does not contain the same files as before removemachine " + masterdir + ", or problem issuing commands on new surrogate"); } } Console.WriteLine("Ensure the cluster is perfectly healthy..."); EnsurePerfectQizmtHealtha(); Console.WriteLine("Run test job, confirm output..."); if (md5_10MB_output != Exec.Shell("Qizmt md5 10MB")) { throw new Exception("Test job output does not match previous run"); } Console.WriteLine("[PASSED] - " + string.Join(" ", args)); } finally { Console.WriteLine("Restoring DFS.xml backup..."); // Note: these are safe; the try/finally only wraps the new dfs. try { Exec.Shell("Qizmt del *"); } catch { } try { System.IO.File.Delete(masterslavedat); } catch { } for (int si = 0; si < otherhosts.Count; si++) { try { System.IO.File.Delete(Surrogate.NetworkPathForHost(otherhosts[si]) + @"\slave.dat"); // Deleting dfs.xml should go last because it'll usually fail. System.IO.File.Delete(Surrogate.NetworkPathForHost(otherhosts[si]) + @"\dfs.xml"); } catch { } } try { System.IO.File.Delete(dfsxmlpath); } catch { } try { // Reformat the cluster so stuff like slave.dat is correct... Exec.Shell("Qizmt @format Machines=" + string.Join(",", otherhosts.ToArray())); } catch (Exception exf) { Console.Error.WriteLine("Problem during reformat, there may be an issue with the cluster", exf); } try { // Delete the dfs.xml just written, it's being replaced with the good one. System.IO.File.Delete(dfsxmlpath); } catch { } System.IO.File.Move(dfsxmlpathbackup, dfsxmlpath); } }
static void Deploy(string[] args) { if (args.Length <= 1 || !System.IO.File.Exists(args[1])) { throw new Exception("Expected path to DFS.xml"); } string dfsxmlpath = args[1]; string masterdir; { System.IO.FileInfo fi = new System.IO.FileInfo(dfsxmlpath); masterdir = fi.DirectoryName; // Directory's full path. } Surrogate.SetNewMetaLocation(masterdir); dfs dc = dfs.ReadDfsConfig_unlocked(dfsxmlpath); string masterhost = System.Net.Dns.GetHostName(); string[] allmachines; { string[] sl = dc.Slaves.SlaveList.Split(';'); List <string> aml = new List <string>(sl.Length + 1); aml.Add(masterhost); foreach (string slave in sl) { if (0 != string.Compare(IPAddressUtil.GetName(slave), IPAddressUtil.GetName(masterhost), StringComparison.OrdinalIgnoreCase)) { aml.Add(slave); } } allmachines = aml.ToArray(); } { Console.WriteLine("Ensure cluster is perfectly healthy..."); EnsurePerfectQizmtHealtha(); // Run a job... string exec_md5; { // Generate some data to operate on. Exec.Shell("Qizmt gen data{AE7E8F7E-AE48-40e7-B5B2-7E07E39B46F9} " + 1048576.ToString()); string exectempdir = @"\\" + System.Net.Dns.GetHostName() + @"\C$\temp\qizmt\regression_test_Deploy-" + Guid.NewGuid().ToString(); if (!System.IO.Directory.Exists(exectempdir)) { System.IO.Directory.CreateDirectory(exectempdir); } string execfp = exectempdir + @"\exec{07E2B469-80F9-4776-908F-E504A906E3B6}"; System.IO.File.WriteAllText(execfp, (@"<?xml version=`1.0` encoding=`utf-8`?> <SourceCode> <Jobs> <Job Name=`exec{07E2B469-80F9-4776-908F-E504A906E3B6}`> <IOSettings> <JobType>mapreduce</JobType> <KeyLength>100</KeyLength> <DFSInput>dfs://data{*}</DFSInput> <DFSOutput>dfs://output{A785E7D1-9017-45fe-9E07-57695192A5DC}</DFSOutput> <KeyMajor>8</KeyMajor> <OutputMethod>sorted</OutputMethod> </IOSettings> <MapReduce> <Map> <![CDATA[ public virtual void Map(ByteSlice line, MapOutput output) { output.Add(line, ByteSlice.Prepare()); } ]]> </Map> <Reduce> <![CDATA[ public override void Reduce(ByteSlice key, ByteSliceList values, ReduceOutput output) { while(values.MoveNext()) { output.Add(key); } } ]]> </Reduce> </MapReduce> </Job> </Jobs> </SourceCode> ").Replace('`', '"')); Exec.Shell("Qizmt importdir " + exectempdir); try { System.IO.File.Delete(execfp); System.IO.Directory.Delete(exectempdir); } catch { } Exec.Shell("Qizmt exec exec{07E2B469-80F9-4776-908F-E504A906E3B6}"); exec_md5 = DfsSum("md5", "output{A785E7D1-9017-45fe-9E07-57695192A5DC}"); Exec.Shell("Qizmt del output{A785E7D1-9017-45fe-9E07-57695192A5DC}"); } try { const string TEMP_DLLS_PATTERN = "temp_????????-????-????-????-????????????.dll"; // Prepare to detect leaked DLLs: string lmachine = allmachines[allmachines.Length - 1]; string[] dummyleaknames = new string[] { TEMP_DLLS_PATTERN.Replace('?', 'x'), //"dummy1D48A66FD2EF41e3B6266C06D320A17D.dll", //"dummy1D48A66FD2EF41e3B6266C06D320A17D.exe" }; try { // Delete leaked DLLs on lmachine... foreach (string fn in System.IO.Directory.GetFiles(Surrogate.NetworkPathForHost(lmachine), TEMP_DLLS_PATTERN)) { System.IO.File.Delete(fn); } // Delete planted files from lmachine... //foreach (string host in allmachines) { string host = lmachine; string netdir = Surrogate.NetworkPathForHost(host); foreach (string dummyleakname in dummyleaknames) { try { System.IO.File.Delete(netdir + @"\" + dummyleakname); } catch { } } } // Plant some new leaked files on surrogate... foreach (string dummyleakname in dummyleaknames) { System.IO.File.WriteAllText(masterdir + @"\" + dummyleakname, "Dummy file for deploy leak detector" + Environment.NewLine); } } catch (Exception e) { lmachine = null; throw new Exception("Failed to prepare for deploy leak detector", e); } { Console.WriteLine("Deleting critical files across cluster to ensure deploy will succeed..."); int nfailed = 0; string failreason = ""; //foreach (string host in allmachines) if (allmachines.Length > 1) // Important; can't delete slave.exe on surrogate or it can't deploy it. { string host = allmachines[allmachines.Length - 1]; try { string netdir = Surrogate.NetworkPathForHost(host); System.IO.File.Delete(netdir + @"\MySpace.DataMining.DistributedObjects.DistributedObjectsSlave.exe"); } catch (Exception fe) { nfailed++; failreason = fe.ToString(); } } if (nfailed > 0) { Console.WriteLine("Warning: {0} files failed to be deleted; {0}", failreason); } } try { Console.WriteLine("Deploying..."); Exec.Shell("aelight deploy"); System.Threading.Thread.Sleep(1000 * 5); // Wait a bit for the services to come back up. } catch (Exception e) { Console.Error.WriteLine(e.ToString()); Console.Error.WriteLine(" WARNING: cluster may be in a bad state; may need to reinstall"); throw; } Console.WriteLine("Ensuring deploy succeeded..."); Console.WriteLine("(Note: if this hangs indefinitely, deploy failed and need to reinstall)"); if (lmachine != null) { //foreach (string host in allmachines) { string host = lmachine; string netdir = Surrogate.NetworkPathForHost(host); foreach (string dummyleakname in dummyleaknames) { { string fp = netdir + @"\" + dummyleakname; if (System.IO.File.Exists(fp)) { throw new Exception("Deployed dummy/leaked file: " + fp); } } } } { string[] leaks = System.IO.Directory.GetFiles(Surrogate.NetworkPathForHost(lmachine), TEMP_DLLS_PATTERN); if (leaks.Length > 0) { throw new Exception("Deployed leaked dll: " + leaks[0] + " (" + leaks.Length.ToString() + " in total)"); } } // Delete the planted dummy files from surrogate! foreach (string dummyleakname in dummyleaknames) { System.IO.File.Delete(masterdir + @"\" + dummyleakname); } } Console.WriteLine("Ensure cluster is perfectly healthy..."); EnsurePerfectQizmtHealtha(); // Re-run job, confirm good... { Exec.Shell("Qizmt exec exec{07E2B469-80F9-4776-908F-E504A906E3B6}"); string new_exec_md5 = DfsSum("md5", "output{A785E7D1-9017-45fe-9E07-57695192A5DC}"); Exec.Shell("Qizmt del output{A785E7D1-9017-45fe-9E07-57695192A5DC}"); if (new_exec_md5 != exec_md5) { throw new Exception("Output files from before and after deploy do not match"); } } } finally { try { Console.WriteLine("Cleaning temporary test data..."); Exec.Shell("Qizmt del exec{07E2B469-80F9-4776-908F-E504A906E3B6}"); Exec.Shell("Qizmt del data{AE7E8F7E-AE48-40e7-B5B2-7E07E39B46F9}"); Exec.Shell("Qizmt del output{A785E7D1-9017-45fe-9E07-57695192A5DC}"); } catch { } } Console.WriteLine("[PASSED] - " + string.Join(" ", args)); } }