protected System.IO.Stream GetStreamFromDfsNode(dfs.DfsFile.FileNode node, bool skipdfschunkheader) { curpartheadersize = 0; curpartpos = 0; bool anydata = false; System.IO.Stream stm = _OpenStream(node); curpartfulllength = stm.Length; anydata = 0 != curpartfulllength; if (skipdfschunkheader && anydata) { if (null == _rb) { _rb = new byte[32]; } _StreamReadExact(stm, _rb, 4); int hlen = Entry.BytesToInt(_rb); if (hlen > 4) { int hremain = hlen - 4; if (hremain > _rb.Length) { _rb = new byte[hremain]; } _StreamReadExact(stm, _rb, hremain); } curpartheadersize = hlen; } curpartpos = curpartheadersize; return(stm); }
System.IO.Stream _OpenStream(dfs.DfsFile.FileNode node) { string[] nodehosts = node.Host.Split(';'); string[] fullnames = new string[nodehosts.Length]; int ReplicateCurrentIndex = ReplicateStartIndex; for (int i = 0; i < fullnames.Length; i++) { fullnames[i] = Surrogate.NetworkPathForHost( nodehosts[ReplicateCurrentIndex % nodehosts.Length]) + @"\" + node.Name; ReplicateCurrentIndex++; } return(new MySpace.DataMining.AELight.DfsFileNodeStream(fullnames, true, System.IO.FileMode.Open, System.IO.FileAccess.Read, System.IO.FileShare.Read, 0x400 * 4)); }
public DfsStream(string dfsfile, bool PreserveOrder, bool MachineLock) { if (MachineLock) { this.Mutex = new System.Threading.Mutex(false, "DfsStream{24A86864-EED6-4680-AB0E-3BDE97262339}"); this.Mutex.WaitOne(); } ReplicateStartIndex = StaticGlobals.Qizmt_BlockID; surrogatedir = Surrogate.NetworkPathForHost(Surrogate.MasterHost); dfs dc = dfs.ReadDfsConfig_unlocked(surrogatedir + @"\" + dfs.DFSXMLNAME); this.RetryTimeout = dc.slave.CookTimeout; this.RetryCount = dc.slave.CookRetries; dfs.DfsFile df = dc.FindAny(dfsfile); if (null == df) { throw new System.IO.FileNotFoundException("DFS file '" + dfsfile + "' not found", dfsfile); } if (0 != string.Compare(DfsFileTypes.NORMAL, df.Type, StringComparison.OrdinalIgnoreCase) && 0 != string.Compare(DfsFileTypes.BINARY_RECT, df.Type, StringComparison.OrdinalIgnoreCase)) { throw new InvalidOperationException("DFS file '" + df.Name + "' cannot be opened because file is of type " + df.Type); } this.reclen = df.RecordLength; nodes = df.Nodes.ToArray(); if (!PreserveOrder) { Random rnd = new Random(unchecked ( System.Threading.Thread.CurrentThread.ManagedThreadId + DateTime.Now.Millisecond * 351 + ReplicateStartIndex + nodes.Length * 6131)); for (int i = 0; i < nodes.Length; i++) { int ridx = rnd.Next(0, nodes.Length); dfs.DfsFile.FileNode tmpnode = nodes[i]; nodes[i] = nodes[ridx]; nodes[ridx] = tmpnode; } } }
public static void ExecOneRemote(SourceCode.Job cfgj, string[] ExecArgs, bool verbose, bool verbosereplication) { if (verbose) { Console.WriteLine("[{0}] [Remote: {2}]", System.DateTime.Now.ToString(), System.DateTime.Now.Millisecond, cfgj.NarrativeName); } string logname = Surrogate.SafeTextPath(cfgj.NarrativeName) + "_" + Guid.NewGuid().ToString() + ".j" + sjid + "_log.txt"; //System.Threading.Thread.Sleep(8000); /*if (cfgj.IOSettings.DFS_IOs == null || cfgj.IOSettings.DFS_IOs.Length == 0) * { * Console.Error.WriteLine("One or more IOSettings/DFS_IO needed in configuration for 'remote'"); * return; * }*/ // Could provide BlockID here, which is just the n-th DFS_IO entry. //cfgj.Remote dfs dc = LoadDfsConfig(); string[] slaves = dc.Slaves.SlaveList.Split(',', ';'); if (dc.Slaves.SlaveList.Length == 0 || slaves.Length < 1) { throw new Exception("SlaveList expected in " + dfs.DFSXMLNAME); } if (dc.Replication > 1) { string[] slavesbefore = slaves; slaves = ExcludeUnhealthySlaveMachines(slaves, true).ToArray(); if (slavesbefore.Length - slaves.Length >= dc.Replication) { throw new Exception("Not enough healthy machines to run job (hit replication count)"); } } if (cfgj.IOSettings.DFS_IO_Multis != null) { cfgj.ExpandDFSIOMultis(slaves.Length, MySpace.DataMining.DistributedObjects.MemoryUtils.NumberOfProcessors); } Dictionary <string, int> slaveIDs = new Dictionary <string, int>(); for (int si = 0; si < slaves.Length; si++) { slaveIDs.Add(slaves[si].ToUpper(), si); } bool aborting = false; try { List <RemoteBlockInfo> blocks = new List <RemoteBlockInfo>(cfgj.IOSettings.DFS_IOs.Length); if (verbose) { Console.WriteLine("{0} processes on {1} machines:", cfgj.IOSettings.DFS_IOs.Length, slaves.Length); } List <string> outputdfsdirs = new List <string>(slaves.Length); { for (int i = 0; i < slaves.Length; i++) { try { outputdfsdirs.Add(NetworkPathForHost(slaves[i])); } catch (Exception e) { Console.Error.WriteLine(" {0}", e.Message); } } } string slaveconfigxml = ""; { System.Xml.XmlDocument pdoc = new System.Xml.XmlDocument(); { System.IO.MemoryStream ms = new System.IO.MemoryStream(); System.Xml.Serialization.XmlSerializer xs = new System.Xml.Serialization.XmlSerializer(typeof(dfs)); xs.Serialize(ms, dc); ms.Seek(0, System.IO.SeekOrigin.Begin); pdoc.Load(ms); } string xml = pdoc.DocumentElement.SelectSingleNode("./slave").OuterXml; //System.Threading.Thread.Sleep(8000); slaveconfigxml = xml; } { // Temporary: for (int si = 0; si < slaves.Length; si++) { System.Threading.Mutex m = new System.Threading.Mutex(false, "AEL_SC_" + slaves[si]); try { m.WaitOne(); } catch (System.Threading.AbandonedMutexException) { } try { System.IO.File.WriteAllText(NetworkPathForHost(slaves[si]) + @"\slaveconfig.j" + sjid + ".xml", slaveconfigxml); } catch { } finally { m.ReleaseMutex(); m.Close(); } } } int nextslave = (new Random(DateTime.Now.Millisecond / 2 + System.Diagnostics.Process.GetCurrentProcess().Id / 2)).Next() % slaves.Length; int hosttypes = 0; List <int> outputrecordlengths = new List <int>(); List <int> inputrecordlengths = new List <int>(); for (int BlockID = 0; BlockID < cfgj.IOSettings.DFS_IOs.Length; BlockID++) { int slaveHostID = 0; RemoteBlockInfo bi = new RemoteBlockInfo(); bi.sampledist = dc.DataNodeBaseSize / dc.DataNodeSamples; bi.BlockID = BlockID; bi.blockcount = cfgj.IOSettings.DFS_IOs.Length; if (string.IsNullOrEmpty(cfgj.IOSettings.DFS_IOs[BlockID].Host)) { if (0 != hosttypes && 1 != hosttypes) { throw new Exception("DFS_IO/Host tag must be specified for all or none"); } hosttypes = 1; bi.SlaveHost = slaves[nextslave]; slaveHostID = nextslave; bi.explicithost = false; } else { if (0 != hosttypes && 2 != hosttypes) { throw new Exception("DFS_IO/Host tag must be specified for all or none"); } hosttypes = 2; bi.SlaveHost = cfgj.IOSettings.DFS_IOs[BlockID].Host; slaveHostID = slaveIDs[bi.SlaveHost.ToUpper()]; bi.explicithost = true; } bi.ExecArgs = ExecArgs; if (++nextslave >= slaves.Length) { nextslave = 0; } bi.logname = logname; bi.outputdfsdirs = outputdfsdirs; bi.slaves = slaves; bi.baseoutputfilesize = dc.DataNodeBaseSize; bi.cfgj = cfgj; bi.DFSWriter = cfgj.IOSettings.DFS_IOs[BlockID].DFSWriter.Trim(); bi.Meta = cfgj.IOSettings.DFS_IOs[BlockID].Meta; List <string> dfswriters = new List <string>(); if (bi.DFSWriter.Length > 0) { string[] writers = bi.DFSWriter.Split(';'); for (int wi = 0; wi < writers.Length; wi++) { string thiswriter = writers[wi].Trim(); if (thiswriter.Length == 0) { continue; } int ic = thiswriter.IndexOf('@'); int reclen = -1; if (-1 != ic) { try { reclen = Surrogate.GetRecordSize(thiswriter.Substring(ic + 1)); thiswriter = thiswriter.Substring(0, ic); } catch (FormatException e) { Console.Error.WriteLine("Error: remote output record length error: {0} ({1})", thiswriter, e.Message); SetFailure(); return; } catch (OverflowException e) { Console.Error.WriteLine("Error: remote output record length error: {0} ({1})", thiswriter, e.Message); SetFailure(); return; } } string outfn = thiswriter; if (outfn.StartsWith(@"dfs://", StringComparison.OrdinalIgnoreCase)) { outfn = outfn.Substring(6); } string reason = ""; if (dfs.IsBadFilename(outfn, out reason)) { Console.Error.WriteLine("Invalid output file: {0}", reason); return; } if (null != DfsFindAny(dc, outfn)) { Console.Error.WriteLine("Error: output file already exists in DFS: {0}", outfn); return; } dfswriters.Add(thiswriter); outputrecordlengths.Add(reclen); } } else { dfswriters.Add(""); outputrecordlengths.Add(-1); } bi.DFSWriters = dfswriters; bi.verbose = verbose; bi.rem = new MySpace.DataMining.DistributedObjects5.Remote(cfgj.NarrativeName + "_remote"); bi.rem.CookRetries = dc.slave.CookRetries; bi.rem.CookTimeout = dc.slave.CookTimeout; bi.rem.DfsSampleDistance = bi.sampledist; bi.rem.CompressFileOutput = dc.slave.CompressDfsChunks; bi.rem.LocalCompile = true; bi.rem.OutputStartingPoint = slaveHostID; bi.rem.CompilerOptions = cfgj.IOSettings.CompilerOptions; bi.rem.CompilerVersion = cfgj.IOSettings.CompilerVersion; if (cfgj.AssemblyReferencesCount > 0) { cfgj.AddAssemblyReferences(bi.rem.CompilerAssemblyReferences, Surrogate.NetworkPathForHost(dc.Slaves.GetFirstSlave())); } if (cfgj.OpenCVExtension != null) { bi.rem.AddOpenCVExtension(); } if (cfgj.MemCache != null) { bi.rem.AddMemCacheExtension(); } if (cfgj.Unsafe != null) { bi.rem.AddUnsafe(); } { List <dfs.DfsFile.FileNode> nodes = new List <dfs.DfsFile.FileNode>(); List <string> mapfileswithnodes = null; List <int> nodesoffsets = null; IList <string> mapfiles = SplitInputPaths(dc, cfgj.IOSettings.DFS_IOs[BlockID].DFSReader); if (mapfiles.Count > 0) { mapfileswithnodes = new List <string>(mapfiles.Count); nodesoffsets = new List <int>(mapfiles.Count); } for (int i = 0; i < mapfiles.Count; i++) { string dp = mapfiles[i].Trim(); int inreclen = -1; if (0 != dp.Length) // Allow empty entry where input isn't wanted. { if (dp.StartsWith("dfs://", StringComparison.OrdinalIgnoreCase)) { dp = dp.Substring(6); } { int ic = dp.IndexOf('@'); if (-1 != ic) { try { inreclen = Surrogate.GetRecordSize(dp.Substring(ic + 1)); dp = dp.Substring(0, ic); } catch (FormatException e) { Console.Error.WriteLine("Error: remote input record length error: {0} ({1})", dp, e.Message); SetFailure(); return; } catch (OverflowException e) { Console.Error.WriteLine("Error: remote input record length error: {0} ({1})", dp, e.Message); SetFailure(); return; } } } dfs.DfsFile df; if (inreclen > 0 || inreclen == -2) { df = DfsFind(dc, dp, DfsFileTypes.BINARY_RECT); if (null != df && inreclen != df.RecordLength) { Console.Error.WriteLine("Error: remote input file does not have expected record length of {0}: {1}@{2}", inreclen, dp, df.RecordLength); SetFailure(); return; } } else { df = DfsFind(dc, dp); } if (null == df) { //throw new Exception("Remote input file not found in DFS: " + dp); Console.Error.WriteLine("Remote input file not found in DFS: {0}", dp); return; } if (df.Nodes.Count > 0) { mapfileswithnodes.Add(dp); nodesoffsets.Add(nodes.Count); inputrecordlengths.Add(inreclen); nodes.AddRange(df.Nodes); } } } bi.dfsinputpaths = new List <string>(nodes.Count); //MapNodesToNetworkPaths(nodes, bi.dfsinputpaths); dfs.MapNodesToNetworkStarPaths(nodes, bi.dfsinputpaths); bi.dfsinputfilenames = mapfileswithnodes; bi.dfsinputnodesoffsets = nodesoffsets; } blocks.Add(bi); bi.thread = new System.Threading.Thread(new System.Threading.ThreadStart(bi.threadproc)); bi.thread.Name = "RemoteJobBlock" + bi.BlockID; } MySpace.DataMining.DistributedObjects.StaticGlobals.DSpace_InputRecordLength = inputrecordlengths.Count > 0 ? inputrecordlengths[0] : -1; MySpace.DataMining.DistributedObjects.StaticGlobals.DSpace_OutputRecordLength = outputrecordlengths.Count > 0 ? outputrecordlengths[0] : -1; // Need to start threads separately due to StaticGlobals being updated. for (int BlockID = 0; BlockID < cfgj.IOSettings.DFS_IOs.Length; BlockID++) { RemoteBlockInfo bi = blocks[BlockID]; bi.rem.InputRecordLength = MySpace.DataMining.DistributedObjects.StaticGlobals.DSpace_InputRecordLength; bi.rem.InputRecordLengths = inputrecordlengths; bi.rem.OutputRecordLength = MySpace.DataMining.DistributedObjects.StaticGlobals.DSpace_OutputRecordLength; bi.rem.OutputRecordLengths = outputrecordlengths; AELight_StartTraceThread(bi.thread); } for (int BlockID = 0; BlockID < blocks.Count; BlockID++) { AELight_JoinTraceThread(blocks[BlockID].thread); blocks[BlockID].rem.Close(); if (blocks[BlockID].blockfail) { Console.Error.WriteLine("BlockID {0} on host '{1}' did not complete successfully", BlockID, (blocks[BlockID].SlaveHost != null) ? blocks[BlockID].SlaveHost : "<null>"); continue; } } List <string> dfsnames = new List <string>(); List <string> dfsnamesreplicating = new List <string>(); // Reload DFS config to make sure changes since starting get rolled in, and make sure the output file wasn't created in that time... using (LockDfsMutex()) // Needed: change between load & save should be atomic. { dc = LoadDfsConfig(); for (int BlockID = 0; BlockID < blocks.Count; BlockID++) { if (blocks[BlockID].blockfail) { continue; } { bool anyoutput = false; bool nonemptyoutputpath = false; for (int oi = 0; oi < blocks[BlockID].DFSWriters.Count; oi++) { string dfswriter = blocks[BlockID].DFSWriters[oi]; if (string.IsNullOrEmpty(dfswriter)) { if (blocks[BlockID].outputdfsnodeses[oi].Count > 0) { Console.Error.WriteLine("Output data detected with no DFSWriter specified"); } } else { { if (null != DfsFind(dc, dfswriter)) { Console.Error.WriteLine("Error: output file was created during job: {0}", dfswriter); continue; } string dfspath = dfswriter; { nonemptyoutputpath = true; dfs.DfsFile df = new dfs.DfsFile(); if (blocks[BlockID].rem.OutputRecordLengths[oi] > 0) { df.XFileType = DfsFileTypes.BINARY_RECT + "@" + blocks[BlockID].rem.OutputRecordLengths[oi].ToString(); } else if (blocks[BlockID].rem.OutputRecordLengths[oi] == -2) { df.XFileType = DfsFileTypes.BINARY_RECT + "@?"; } df.Nodes = new List <dfs.DfsFile.FileNode>(); df.Size = -1; // Preset if (dfspath.StartsWith("dfs://", StringComparison.OrdinalIgnoreCase)) { dfspath = dfspath.Substring(6); } string dfspathreplicating = ".$" + dfspath + ".$replicating-" + Guid.NewGuid().ToString(); if (null != dc.FindAny(dfspathreplicating)) { Console.Error.WriteLine("Error: file exists: file put into DFS from another location during job: " + dfspathreplicating); SetFailure(); return; } dfsnames.Add(dfspath); dfsnamesreplicating.Add(dfspathreplicating); df.Name = dfspathreplicating; bool anybad = false; long totalsize = 0; { int i = BlockID; for (int j = 0; j < blocks[i].outputdfsnodeses[oi].Count; j++) { dfs.DfsFile.FileNode fn = new dfs.DfsFile.FileNode(); fn.Host = blocks[i].slaves[(blocks[i].rem.OutputStartingPoint + j) % blocks[i].slaves.Count]; fn.Name = blocks[i].outputdfsnodeses[oi][j]; df.Nodes.Add(fn); fn.Length = -1; // Preset fn.Position = -1; // Preset if (anybad) { continue; } fn.Length = blocks[i].outputsizeses[oi][j]; fn.Position = totalsize; // Position must be set before totalsize updated! if (blocks[i].outputdfsnodeses[oi].Count != blocks[i].outputsizeses[oi].Count) { anybad = true; continue; } totalsize += blocks[i].outputsizeses[oi][j]; } } if (!anybad) { df.Size = totalsize; } if (totalsize != 0) { anyoutput = true; } // Always add the file to DFS, even if blank! dc.Files.Add(df); } } } } if (!anyoutput && verbose && nonemptyoutputpath) { Console.Write(" (no DFS output) "); ConsoleFlush(); } } } UpdateDfsXml(dc); } ReplicationPhase(verbosereplication, blocks.Count, slaves, dfsnamesreplicating); using (LockDfsMutex()) // Needed: change between load & save should be atomic. { dc = LoadDfsConfig(); // Reload in case of change or user modifications. for (int nfile = 0; nfile < dfsnames.Count; nfile++) { string dfspath = dfsnames[nfile]; string dfspathreplicating = dfsnamesreplicating[nfile]; { dfs.DfsFile dfu = dc.FindAny(dfspathreplicating); if (null != dfu) { if (null != DfsFindAny(dc, dfspath)) { Console.Error.WriteLine("Error: file exists: file put into DFS from another location during job"); SetFailure(); continue; } dfu.Name = dfspath; } } } UpdateDfsXml(dc); } if (verbose) { Console.WriteLine(); // Line after output chars. } } catch (System.Threading.ThreadAbortException) { aborting = true; } finally { { for (int si = 0; si < slaves.Length; si++) { System.Threading.Mutex m = new System.Threading.Mutex(false, "AEL_SC_" + slaves[si]); try { m.WaitOne(); } catch (System.Threading.AbandonedMutexException) { } try { System.IO.File.Delete(NetworkPathForHost(slaves[si]) + @"\slaveconfig.j" + sjid + ".xml"); } catch { } finally { m.ReleaseMutex(); m.Close(); } } } if (!aborting) { CheckUserLogs(slaves, logname); } } if (verbose) { Console.WriteLine(); Console.WriteLine("[{0}] Done", System.DateTime.Now.ToString(), System.DateTime.Now.Millisecond); for (int i = 0; i < cfgj.IOSettings.DFS_IOs.Length; i++) { Console.WriteLine("Output: {0}", cfgj.IOSettings.DFS_IOs[i].DFSWriter); } } }
public static void DfsBulkPut(string[] args) { string inputfilepath = args[0]; // Lines of "<host> <chunkname> <size>" but <size> must exclude the size of the header. string newprettyfilename = args[1]; string filetype = args[2]; // Can probably only support file types that don't have samples. if (!filetype.StartsWith(DfsFileTypes.BINARY_RECT, true, null)) { throw new Exception("DfsBulkPut: must be of type " + DfsFileTypes.BINARY_RECT); } List<string> partsinfo = new List<string>(); using (System.IO.StreamReader sr = System.IO.File.OpenText(inputfilepath)) { for (; ; ) { string pn = sr.ReadLine(); if (null == pn) { break; } if (0 != pn.Length) { partsinfo.Add(pn); #if DEBUG { string[] x = pn.Split(' '); string xhost = x[0]; string xchunkname = x[1]; long xchunksize = long.Parse(x[2]); if (-1 != xhost.IndexOf(';')) { throw new Exception("DEBUG: cannot specify multiple hosts: " + xhost); } if(!System.IO.File.Exists(Surrogate.NetworkPathForHost(xhost) + @"\" + xchunkname)) { throw new Exception("DEBUG: (!System.IO.File.Exists(\"" + Surrogate.NetworkPathForHost(xhost) + @"\" + xchunkname + "\"))"); } if (xchunksize < 0) { throw new Exception("DEBUG: chunk size is negative: " + xchunksize.ToString()); } } #endif } } } string[] slaves; { dfs.DfsFile df = new dfs.DfsFile(); df.Nodes = new List<dfs.DfsFile.FileNode>(partsinfo.Count); df.Name = ".$" + newprettyfilename + ".$replicating-" + Guid.NewGuid().ToString(); df.Type = filetype; using (LockDfsMutex()) { dfs dc = LoadDfsConfig(); if (null != dc.FindAny(newprettyfilename)) { Console.Error.WriteLine("Output file already exists: " + newprettyfilename); SetFailure(); return; } { slaves = dc.Slaves.SlaveList.Split(',', ';'); if (null == dc.Slaves.SlaveList || dc.Slaves.SlaveList.Length == 0 || slaves.Length < 1) { Console.Error.WriteLine("SlaveList expected in configuration (no machines)"); SetFailure(); return; } if (dc.Replication > 1) { slaves = ExcludeUnhealthySlaveMachines(slaves, true).ToArray(); } if (0 == slaves.Length) { Console.Error.WriteLine("No healthy machines for DFS put"); SetFailure(); return; } } checked { long TotalSize = 0; for (int ipi = 0; ipi < partsinfo.Count; ipi++) { string[] x = partsinfo[ipi].Split(' '); string xhost = x[0]; string xchunkname = x[1]; long xchunksize = long.Parse(x[2]); dfs.DfsFile.FileNode fn = new dfs.DfsFile.FileNode(); fn.Position = ipi; fn.Host = xhost; fn.Name = xchunkname; fn.Length = xchunksize; TotalSize += xchunksize; df.Nodes.Add(fn); } df.Size = TotalSize; } dc.Files.Add(df); UpdateDfsXml(dc); } ReplicationPhase(df.Name, false, 0, slaves); using (LockDfsMutex()) { dfs dc = LoadDfsConfig(); // Reload in case of intermediate change. dfs.DfsFile dfu = dc.FindAny(df.Name); if (null != dfu) { if (null != DfsFindAny(dc, newprettyfilename)) { Console.Error.WriteLine("Output file already exists: " + newprettyfilename); SetFailure(); return; } dfu.Name = newprettyfilename; UpdateDfsXml(dc); } } } }
static void DfsPutBinary(string[] args) { if (!dfs.DfsConfigExists(DFSXMLPATH)) { Console.Error.WriteLine("DFS not setup; use: {0} format", appname); SetFailure(); return; } if (args.Length < 2) { Console.Error.WriteLine("putbinary error: {0} putbinary <wildcard> <dfsname>", appname); SetFailure(); return; } string localpath = args[0]; int del = localpath.LastIndexOf(@"\"); string dir = localpath.Substring(0, del); if (!System.IO.Directory.Exists(dir)) { Console.Error.WriteLine("Directory not found: {0}", dir); SetFailure(); return; } string wildcard = localpath.Substring(del + 1); System.IO.DirectoryInfo di = new System.IO.DirectoryInfo(dir); System.IO.FileInfo[] files = di.GetFiles(wildcard); if (files.Length == 0) { Console.Error.WriteLine("No files found in directory {0} matching wildcard {1}", dir, wildcard); SetFailure(); return; } string dfspath = args[1]; if (dfspath.StartsWith("dfs://", StringComparison.OrdinalIgnoreCase)) { dfspath = dfspath.Substring(6); } string reason = ""; if (dfs.IsBadFilename(dfspath, out reason)) { Console.Error.WriteLine("Invalid dfspath: {0}", reason); SetFailure(); return; } dfs dc = LoadDfsConfig(); for (int i = 0; i < dc.Files.Count; i++) { if (0 == string.Compare(dc.Files[i].Name, dfspath, true)) { Console.Error.WriteLine("Error: The specified file already exists in DFS: {0}", dfspath); SetFailure(); return; } } string[] slaves = dc.Slaves.SlaveList.Split(',', ';'); if (null == dc.Slaves.SlaveList || dc.Slaves.SlaveList.Length == 0 || slaves.Length < 1) { Console.Error.WriteLine("SlaveList expected in configuration (no machines)"); SetFailure(); return; } if (dc.Replication > 1) { slaves = ExcludeUnhealthySlaveMachines(slaves, true).ToArray(); } if (0 == slaves.Length) { Console.Error.WriteLine("No healthy machines for DFS putbinary"); SetFailure(); return; } int blobPadSize = MySpace.DataMining.DistributedObjects.Blob.padSize; string nl = Environment.NewLine; byte[] nbuf = System.Text.Encoding.UTF8.GetBytes(nl); long MAX_IMAGE_SIZE = dc.DataNodeBaseSize - blobPadSize * 4 - nbuf.Length; string sMAX_IMAGE_SIZE = AELight.GetFriendlyByteSize(MAX_IMAGE_SIZE); const int MAX_SIZE_PER_RECEIVE = 0x400 * 21 * 3; long sampledist = dc.DataNodeBaseSize / dc.DataNodeSamples; Random rnd = new Random((DateTime.Now.Millisecond / 2) + (System.Diagnostics.Process.GetCurrentProcess().Id / 2)); List<dfs.DfsFile.FileNode> ninfos = new List<dfs.DfsFile.FileNode>(64); int nextslave = rnd.Next() % slaves.Length; long curbytepos = 0; long nextsamplepos = 0; byte[] fbuf = new byte[MAX_SIZE_PER_RECEIVE]; byte[] sbuf = new byte[(MAX_SIZE_PER_RECEIVE / 3) * 4]; int fi = 0; while(fi < files.Length) { string SlaveHost = slaves[nextslave]; if (++nextslave >= slaves.Length) { nextslave = 0; } string netdir = NetworkPathForHost(SlaveHost); string chunkname = GenerateZdFileDataNodeName(dfspath); string chunkpath = netdir + @"\" + chunkname; string samplepath = netdir + @"\" + chunkname + ".zsa"; long chunkremain = dc.DataNodeBaseSize; long chunkpos = curbytepos; System.IO.FileStream fc = new System.IO.FileStream(chunkpath, System.IO.FileMode.CreateNew, System.IO.FileAccess.Write, System.IO.FileShare.None, FILE_BUFFER_SIZE); System.IO.FileStream fsa = new System.IO.FileStream(samplepath, System.IO.FileMode.Create, System.IO.FileAccess.Write, System.IO.FileShare.None); MySpace.DataMining.DistributedObjects.Entry.ToBytes(4 + 8, fbuf, 0); // Size of header. MySpace.DataMining.DistributedObjects.Entry.LongToBytes(chunkpos, fbuf, 4); fc.Write(fbuf, 0, 4 + 8); while (fi < files.Length) { System.IO.FileInfo file = files[fi]; long b64Len = file.Length / 3L; if (b64Len * 3L < file.Length) { b64Len++; } b64Len *= 4; if (b64Len >= MAX_IMAGE_SIZE) { Console.Error.WriteLine("Cannot put file. File exceeds size limit of {0}: {1}", sMAX_IMAGE_SIZE, file.Name); fi++; continue; } if (b64Len > chunkremain) { break; } System.IO.FileStream fs = new System.IO.FileStream(file.FullName, System.IO.FileMode.Open, System.IO.FileAccess.Read, System.IO.FileShare.Read); { string fname = file.Name; if (fname.Length < blobPadSize) { fname = fname.PadRight(blobPadSize); } else if (fname.Length > blobPadSize) { fname = fname.Substring(0, blobPadSize); } int bc = System.Text.Encoding.UTF8.GetBytes(fname, 0, fname.Length, fbuf, 0); fc.Write(fbuf, 0, bc); chunkremain -= bc; curbytepos += bc; } int xread = 0; for (; ; ) { xread = fs.Read(fbuf, 0, MAX_SIZE_PER_RECEIVE); if (xread <= 0) { break; } string s = Convert.ToBase64String(fbuf, 0, xread); int bc = System.Text.Encoding.UTF8.GetBytes(s, 0, s.Length, sbuf, 0); fc.Write(sbuf, 0, bc); chunkremain -= bc; curbytepos += bc; if (chunkpos >= nextsamplepos) { fsa.Write(sbuf, 0, bc); nextsamplepos += sampledist; } } { fc.Write(nbuf, 0, nbuf.Length); chunkremain -= nbuf.Length; curbytepos += nbuf.Length; } fs.Close(); fi++; } fc.Close(); fsa.Close(); { dfs.DfsFile.FileNode fnode = new dfs.DfsFile.FileNode(); fnode.Host = SlaveHost; fnode.Position = chunkpos; fnode.Length = curbytepos - chunkpos; fnode.Name = chunkname; ninfos.Add(fnode); } } string dfspathreplicating = ".$" + dfspath + ".$replicating-" + Guid.NewGuid().ToString(); using (LockDfsMutex()) // Needed: change between load & save should be atomic. { dc = LoadDfsConfig(); // Reload in case of changes during put. if (null != dc.FindAny(dfspathreplicating)) { Console.Error.WriteLine("Error: file exists: file put into DFS from another location during put: " + dfspathreplicating); SetFailure(); return; } dfs.DfsFile dfsfile = new dfs.DfsFile(); dfsfile.Nodes = ninfos; dfsfile.Name = dfspathreplicating; dfsfile.Size = curbytepos; dc.Files.Add(dfsfile); UpdateDfsXml(dc); } ReplicationPhase(dfspathreplicating, true, 0, slaves); using (LockDfsMutex()) // Needed: change between load & save should be atomic. { dc = LoadDfsConfig(); // Reload in case of changes during put. dfs.DfsFile dfu = dc.FindAny(dfspathreplicating); if (null != dfu) { if (null != DfsFindAny(dc, dfspath)) { Console.Error.WriteLine("Error: file exists: file put into DFS from another location during put"); SetFailure(); return; } dfu.Name = dfspath; UpdateDfsXml(dc); } } Console.WriteLine("Sent {0} bytes to file dfs://{1}", curbytepos, dfspath); }
static void DfsPut(string[] args, long maxLineSize) { if (args.Length > 0 && "-rv" == args[0]) { args = SubArray(args, 1); ReplicationDebugVerbose = true; } if (!dfs.DfsConfigExists(DFSXMLPATH)) { Console.Error.WriteLine("DFS not setup; use: {0} format", appname); SetFailure(); return; } if (args.Length < 1) { Console.Error.WriteLine("dfs put error: {0} dfs put <localpath> [<dfspath>]", appname); SetFailure(); return; } { string localpath = args[0]; if (!System.IO.File.Exists(localpath)) { Console.Error.WriteLine("File not found: {0}", localpath); SetFailure(); return; } string dfspath; if (args.Length > 1) { dfspath = args[1]; if (dfspath.StartsWith("dfs://", StringComparison.OrdinalIgnoreCase)) { dfspath = dfspath.Substring(6); } } else { dfspath = (new System.IO.FileInfo(localpath)).Name; } int RecordLength = -1; { int ic = dfspath.IndexOf('@'); if (-1 != ic) { try { RecordLength = Surrogate.GetRecordSize(dfspath.Substring(ic + 1)); dfspath = dfspath.Substring(0, ic); } catch (FormatException e) { Console.Error.WriteLine("Invalid Record Length or DFS path: {0}", e.Message); SetFailure(); return; } catch (OverflowException e) { Console.Error.WriteLine("Invalid Record Length or DFS path: {0}", e.Message); SetFailure(); return; } } } { string reason = ""; if (dfs.IsBadFilename(dfspath, out reason)) { Console.Error.WriteLine("Invalid DFS path: {0}", reason); SetFailure(); return; } } EnsureNetworkPath(localpath); { dfs dc = LoadDfsConfig(); if (localpath.EndsWith(".dll", StringComparison.OrdinalIgnoreCase)) { if (-1 != dfspath.IndexOf(@".\") || -1 != dfspath.IndexOf(@"./")) { // Prevent navigating directories. Console.Error.WriteLine("Invalid DFS name for DLL"); SetFailure(); return; } System.IO.FileInfo dllfi = new System.IO.FileInfo(localpath); dc.Find(dfspath, DfsFileTypes.DLL); // Error if not dll, otherwise fine to replace. string[] slaves = dc.Slaves.SlaveList.Split(';'); MySpace.DataMining.Threading.ThreadTools<string>.Parallel( new Action<string>( delegate(string slave) { string netpath = Surrogate.NetworkPathForHost(slave); string cacpath = netpath + @"\" + dfs.DLL_DIR_NAME; try { System.IO.Directory.CreateDirectory(cacpath); } catch { } System.IO.File.Copy(localpath, cacpath + @"\" + dfspath, true); }), slaves); using (LockDfsMutex()) // Needed: change between load & save should be atomic. { dc = LoadDfsConfig(); // Reload in case of changes during put. dfs.DfsFile dfsfile = dc.Find(dfspath, DfsFileTypes.DLL); // Error if not dll, otherwise fine to replace. if (null == dfsfile) { dfsfile = new dfs.DfsFile(); dc.Files.Add(dfsfile); } dfsfile.Type = DfsFileTypes.DLL; dfsfile.Name = dfspath; dfsfile.Size = dllfi.Length; UpdateDfsXml(dc); } Console.WriteLine("dfs://{0} successfully written", dfspath); } else { for (int i = 0; i < dc.Files.Count; i++) { if (0 == string.Compare(dc.Files[i].Name, dfspath, true)) { Console.Error.WriteLine("Error: The specified file already exists in DFS: {0}", dfspath); SetFailure(); return; } } long sampledist = dc.DataNodeBaseSize / dc.DataNodeSamples; using (System.IO.FileStream _fs = new System.IO.FileStream(localpath, System.IO.FileMode.Open, System.IO.FileAccess.Read, System.IO.FileShare.Read)) { //const int MAX_SIZE_PER_RECEIVE = 0x400 * 64; //byte[] fbuf = new byte[MAX_SIZE_PER_RECEIVE]; byte[] fbuf = new byte[maxLineSize]; int[] lbuf = new int[3]; short lbufCount = 0; System.IO.Stream fs = _fs; if (localpath.EndsWith(".gz", StringComparison.OrdinalIgnoreCase)) { fs = new System.IO.Compression.GZipStream(_fs, System.IO.Compression.CompressionMode.Decompress); if (RecordLength < 1) { lbuf[2] = fs.ReadByte(); lbuf[1] = fs.ReadByte(); lbuf[0] = fs.ReadByte(); if (!(lbuf[2] == 0xEF && lbuf[1] == 0xBB && lbuf[0] == 0xBF)) { lbufCount = 3; } } } else { if (RecordLength < 1) { //remove BOM fs.Read(fbuf, 0, 3); if (!(fbuf[0] == 0xEF && fbuf[1] == 0xBB && fbuf[2] == 0xBF)) { fs.Position = 0; } } } string[] slaves = dc.Slaves.SlaveList.Split(',', ';'); if (null == dc.Slaves.SlaveList || dc.Slaves.SlaveList.Length == 0 || slaves.Length < 1) { Console.Error.WriteLine("SlaveList expected in configuration (no machines)"); SetFailure(); return; } if (dc.Replication > 1) { slaves = ExcludeUnhealthySlaveMachines(slaves, true).ToArray(); } if (0 == slaves.Length) { Console.Error.WriteLine("No healthy machines for DFS put"); SetFailure(); return; } Random rnd = new Random((DateTime.Now.Millisecond / 2) + (System.Diagnostics.Process.GetCurrentProcess().Id / 2)); List<dfs.DfsFile.FileNode> ninfos = new List<dfs.DfsFile.FileNode>(64); int nextslave = rnd.Next() % slaves.Length; long curbytepos = 0; for (; ; ) { #if DEBUG if (RecordLength > 0) { if (lbufCount != 0) { // lbufCount should be zero here because BOM isn't used with rectangular records. throw new Exception("Internal error: (RecordLength > 0) && (lbufCount != 0)"); } } #endif string SlaveHost = slaves[nextslave]; string SlaveIP = IPAddressUtil.GetIPv4Address(SlaveHost); if (++nextslave >= slaves.Length) { nextslave = 0; } string netdir = NetworkPathForHost(SlaveHost); string chunkname = GenerateZdFileDataNodeName(dfspath); string chunkpath = netdir + @"\" + chunkname; string samplepath = netdir + @"\" + chunkname + ".zsa"; using (System.IO.FileStream _fc = new System.IO.FileStream(chunkpath, System.IO.FileMode.CreateNew, System.IO.FileAccess.Write, System.IO.FileShare.None, FILE_BUFFER_SIZE)) { System.IO.FileStream samps = null; if (RecordLength < 1) { samps = new System.IO.FileStream(samplepath, System.IO.FileMode.Create, System.IO.FileAccess.Write, System.IO.FileShare.None); } try { long chunkpos = 0; long nextsamplepos = 0; long Position = curbytepos; System.IO.Stream fc = _fc; if (1 == dc.slave.CompressDfsChunks) { fc = new System.IO.Compression.GZipStream(_fc, System.IO.Compression.CompressionMode.Compress); } MySpace.DataMining.DistributedObjects.Entry.ToBytes(4 + 8, fbuf, 0); // Size of header. MySpace.DataMining.DistributedObjects.Entry.LongToBytes(Position, fbuf, 4); fc.Write(fbuf, 0, 4 + 8); { long chunkremain = dc.DataNodeBaseSize; long Length = 0; bool eof = false; while (chunkremain > 0 && !eof) { if (RecordLength > 0) { int recordremain = RecordLength; while (recordremain > 0) { int xread = recordremain; if (xread > fbuf.Length) { xread = fbuf.Length; } xread = fs.Read(fbuf, 0, xread); if (xread < 1) { eof = true; if (recordremain != RecordLength) { Console.Error.WriteLine("Warning: incomplete record at end of input file"); } break; } fc.Write(fbuf, 0, xread); chunkremain -= xread; recordremain -= xread; curbytepos += xread; #if DEBUG if (recordremain < 0) { throw new Exception("DEBUG: (recordremain < 0)"); } #endif } } else { int xread = 0; for (; ; ) { int ib; if (lbufCount == 0) { ib = fs.ReadByte(); } else { ib = lbuf[--lbufCount]; } if (-1 == ib) { eof = true; break; } if (xread < fbuf.Length) { fbuf[xread++] = (byte)ib; } if ('\n' == ib) { break; } } //Length += xread; chunkremain -= xread; curbytepos += xread; fc.Write(fbuf, 0, xread); chunkpos += xread; if (chunkpos >= nextsamplepos) { samps.Write(fbuf, 0, xread); nextsamplepos += sampledist; } } } Length = curbytepos - Position; if (0 == Length) { break; } Length = curbytepos - Position; { dfs.DfsFile.FileNode fnode = new dfs.DfsFile.FileNode(); fnode.Host = SlaveHost; fnode.Position = Position; fnode.Length = Length; fnode.Name = chunkname; ninfos.Add(fnode); } } fc.Close(); } finally { if (null != samps) { samps.Dispose(); } } } } string dfspathreplicating = ".$" + dfspath + ".$replicating-" + Guid.NewGuid().ToString(); using (LockDfsMutex()) // Needed: change between load & save should be atomic. { dc = LoadDfsConfig(); // Reload in case of changes during put. if (null != dc.FindAny(dfspathreplicating)) { Console.Error.WriteLine("Error: file exists: file put into DFS from another location during put: " + dfspathreplicating); SetFailure(); return; } dfs.DfsFile dfsfile = new dfs.DfsFile(); //dfsfile.Nodes = new List<dfs.DfsFile.FileNode>(ninfos); if (RecordLength > 0) { dfsfile.XFileType = DfsFileTypes.BINARY_RECT + "@" + RecordLength.ToString(); } dfsfile.Nodes = ninfos; dfsfile.Name = dfspathreplicating; dfsfile.Size = curbytepos; dc.Files.Add(dfsfile); UpdateDfsXml(dc); } fs.Close(); ReplicationPhase(dfspathreplicating, true, 0, slaves); using (LockDfsMutex()) // Needed: change between load & save should be atomic. { dc = LoadDfsConfig(); // Reload in case of changes during put. dfs.DfsFile dfu = dc.FindAny(dfspathreplicating); if (null != dfu) { if (null != DfsFindAny(dc, dfspath)) { Console.Error.WriteLine("Error: file exists: file put into DFS from another location during put"); SetFailure(); return; } dfu.Name = dfspath; UpdateDfsXml(dc); } } Console.WriteLine("Sent {0} bytes to file dfs://{1}", curbytepos, dfspath); } } } } }
public static void ExecOneRemote(SourceCode.Job cfgj, string[] ExecArgs, bool verbose, bool verbosereplication) { if (verbose) { Console.WriteLine("[{0}] [Remote: {2}]", System.DateTime.Now.ToString(), System.DateTime.Now.Millisecond, cfgj.NarrativeName); } string logname = Surrogate.SafeTextPath(cfgj.NarrativeName) + "_" + Guid.NewGuid().ToString() + ".j" + sjid + "_log.txt"; //System.Threading.Thread.Sleep(8000); /*if (cfgj.IOSettings.DFS_IOs == null || cfgj.IOSettings.DFS_IOs.Length == 0) { Console.Error.WriteLine("One or more IOSettings/DFS_IO needed in configuration for 'remote'"); return; }*/ // Could provide BlockID here, which is just the n-th DFS_IO entry. //cfgj.Remote dfs dc = LoadDfsConfig(); string[] slaves = dc.Slaves.SlaveList.Split(',', ';'); if (dc.Slaves.SlaveList.Length == 0 || slaves.Length < 1) { throw new Exception("SlaveList expected in " + dfs.DFSXMLNAME); } if(dc.Replication > 1) { string[] slavesbefore = slaves; slaves = ExcludeUnhealthySlaveMachines(slaves, true).ToArray(); if (slavesbefore.Length - slaves.Length >= dc.Replication) { throw new Exception("Not enough healthy machines to run job (hit replication count)"); } } if (cfgj.IOSettings.DFS_IO_Multis != null) { cfgj.ExpandDFSIOMultis(slaves.Length, MySpace.DataMining.DistributedObjects.MemoryUtils.NumberOfProcessors); } Dictionary<string, int> slaveIDs = new Dictionary<string, int>(); for (int si = 0; si < slaves.Length; si++) { slaveIDs.Add(slaves[si].ToUpper(), si); } bool aborting = false; try { List<RemoteBlockInfo> blocks = new List<RemoteBlockInfo>(cfgj.IOSettings.DFS_IOs.Length); if (verbose) { Console.WriteLine("{0} processes on {1} machines:", cfgj.IOSettings.DFS_IOs.Length, slaves.Length); } List<string> outputdfsdirs = new List<string>(slaves.Length); { for (int i = 0; i < slaves.Length; i++) { try { outputdfsdirs.Add(NetworkPathForHost(slaves[i])); } catch (Exception e) { Console.Error.WriteLine(" {0}", e.Message); } } } string slaveconfigxml = ""; { System.Xml.XmlDocument pdoc = new System.Xml.XmlDocument(); { System.IO.MemoryStream ms = new System.IO.MemoryStream(); System.Xml.Serialization.XmlSerializer xs = new System.Xml.Serialization.XmlSerializer(typeof(dfs)); xs.Serialize(ms, dc); ms.Seek(0, System.IO.SeekOrigin.Begin); pdoc.Load(ms); } string xml = pdoc.DocumentElement.SelectSingleNode("./slave").OuterXml; //System.Threading.Thread.Sleep(8000); slaveconfigxml = xml; } { // Temporary: for (int si = 0; si < slaves.Length; si++) { System.Threading.Mutex m = new System.Threading.Mutex(false, "AEL_SC_" + slaves[si]); try { m.WaitOne(); } catch (System.Threading.AbandonedMutexException) { } try { System.IO.File.WriteAllText(NetworkPathForHost(slaves[si]) + @"\slaveconfig.j" + sjid + ".xml", slaveconfigxml); } catch { } finally { m.ReleaseMutex(); m.Close(); } } } int nextslave = (new Random(DateTime.Now.Millisecond / 2 + System.Diagnostics.Process.GetCurrentProcess().Id / 2)).Next() % slaves.Length; int hosttypes = 0; List<int> outputrecordlengths = new List<int>(); List<int> inputrecordlengths = new List<int>(); for (int BlockID = 0; BlockID < cfgj.IOSettings.DFS_IOs.Length; BlockID++) { int slaveHostID = 0; RemoteBlockInfo bi = new RemoteBlockInfo(); bi.sampledist = dc.DataNodeBaseSize / dc.DataNodeSamples; bi.BlockID = BlockID; bi.blockcount = cfgj.IOSettings.DFS_IOs.Length; if (string.IsNullOrEmpty(cfgj.IOSettings.DFS_IOs[BlockID].Host)) { if (0 != hosttypes && 1 != hosttypes) { throw new Exception("DFS_IO/Host tag must be specified for all or none"); } hosttypes = 1; bi.SlaveHost = slaves[nextslave]; slaveHostID = nextslave; bi.explicithost = false; } else { if (0 != hosttypes && 2 != hosttypes) { throw new Exception("DFS_IO/Host tag must be specified for all or none"); } hosttypes = 2; bi.SlaveHost = cfgj.IOSettings.DFS_IOs[BlockID].Host; slaveHostID = slaveIDs[bi.SlaveHost.ToUpper()]; bi.explicithost = true; } bi.ExecArgs = ExecArgs; if (++nextslave >= slaves.Length) { nextslave = 0; } bi.logname = logname; bi.outputdfsdirs = outputdfsdirs; bi.slaves = slaves; bi.baseoutputfilesize = dc.DataNodeBaseSize; bi.cfgj = cfgj; bi.DFSWriter = cfgj.IOSettings.DFS_IOs[BlockID].DFSWriter.Trim(); bi.Meta = cfgj.IOSettings.DFS_IOs[BlockID].Meta; List<string> dfswriters = new List<string>(); if (bi.DFSWriter.Length > 0) { string[] writers = bi.DFSWriter.Split(';'); for (int wi = 0; wi < writers.Length; wi++) { string thiswriter = writers[wi].Trim(); if (thiswriter.Length == 0) { continue; } int ic = thiswriter.IndexOf('@'); int reclen = -1; if (-1 != ic) { try { reclen = Surrogate.GetRecordSize(thiswriter.Substring(ic + 1)); thiswriter = thiswriter.Substring(0, ic); } catch (FormatException e) { Console.Error.WriteLine("Error: remote output record length error: {0} ({1})", thiswriter, e.Message); SetFailure(); return; } catch (OverflowException e) { Console.Error.WriteLine("Error: remote output record length error: {0} ({1})", thiswriter, e.Message); SetFailure(); return; } } string outfn = thiswriter; if (outfn.StartsWith(@"dfs://", StringComparison.OrdinalIgnoreCase)) { outfn = outfn.Substring(6); } string reason = ""; if (dfs.IsBadFilename(outfn, out reason)) { Console.Error.WriteLine("Invalid output file: {0}", reason); return; } if (null != DfsFindAny(dc, outfn)) { Console.Error.WriteLine("Error: output file already exists in DFS: {0}", outfn); return; } dfswriters.Add(thiswriter); outputrecordlengths.Add(reclen); } } else { dfswriters.Add(""); outputrecordlengths.Add(-1); } bi.DFSWriters = dfswriters; bi.verbose = verbose; bi.rem = new MySpace.DataMining.DistributedObjects5.Remote(cfgj.NarrativeName + "_remote"); bi.rem.CookRetries = dc.slave.CookRetries; bi.rem.CookTimeout = dc.slave.CookTimeout; bi.rem.DfsSampleDistance = bi.sampledist; bi.rem.CompressFileOutput = dc.slave.CompressDfsChunks; bi.rem.LocalCompile = true; bi.rem.OutputStartingPoint = slaveHostID; bi.rem.CompilerOptions = cfgj.IOSettings.CompilerOptions; bi.rem.CompilerVersion = cfgj.IOSettings.CompilerVersion; if (cfgj.AssemblyReferencesCount > 0) { cfgj.AddAssemblyReferences(bi.rem.CompilerAssemblyReferences, Surrogate.NetworkPathForHost(dc.Slaves.GetFirstSlave())); } if (cfgj.OpenCVExtension != null) { bi.rem.AddOpenCVExtension(); } if (cfgj.MemCache != null) { bi.rem.AddMemCacheExtension(); } if (cfgj.Unsafe != null) { bi.rem.AddUnsafe(); } { List<dfs.DfsFile.FileNode> nodes = new List<dfs.DfsFile.FileNode>(); List<string> mapfileswithnodes = null; List<int> nodesoffsets = null; IList<string> mapfiles = SplitInputPaths(dc, cfgj.IOSettings.DFS_IOs[BlockID].DFSReader); if (mapfiles.Count > 0) { mapfileswithnodes = new List<string>(mapfiles.Count); nodesoffsets = new List<int>(mapfiles.Count); } for (int i = 0; i < mapfiles.Count; i++) { string dp = mapfiles[i].Trim(); int inreclen = -1; if (0 != dp.Length) // Allow empty entry where input isn't wanted. { if (dp.StartsWith("dfs://", StringComparison.OrdinalIgnoreCase)) { dp = dp.Substring(6); } { int ic = dp.IndexOf('@'); if (-1 != ic) { try { inreclen = Surrogate.GetRecordSize(dp.Substring(ic + 1)); dp = dp.Substring(0, ic); } catch (FormatException e) { Console.Error.WriteLine("Error: remote input record length error: {0} ({1})", dp, e.Message); SetFailure(); return; } catch (OverflowException e) { Console.Error.WriteLine("Error: remote input record length error: {0} ({1})", dp, e.Message); SetFailure(); return; } } } dfs.DfsFile df; if (inreclen > 0 || inreclen == -2) { df = DfsFind(dc, dp, DfsFileTypes.BINARY_RECT); if (null != df && inreclen != df.RecordLength) { Console.Error.WriteLine("Error: remote input file does not have expected record length of {0}: {1}@{2}", inreclen, dp, df.RecordLength); SetFailure(); return; } } else { df = DfsFind(dc, dp); } if (null == df) { //throw new Exception("Remote input file not found in DFS: " + dp); Console.Error.WriteLine("Remote input file not found in DFS: {0}", dp); return; } if (df.Nodes.Count > 0) { mapfileswithnodes.Add(dp); nodesoffsets.Add(nodes.Count); inputrecordlengths.Add(inreclen); nodes.AddRange(df.Nodes); } } } bi.dfsinputpaths = new List<string>(nodes.Count); //MapNodesToNetworkPaths(nodes, bi.dfsinputpaths); dfs.MapNodesToNetworkStarPaths(nodes, bi.dfsinputpaths); bi.dfsinputfilenames = mapfileswithnodes; bi.dfsinputnodesoffsets = nodesoffsets; } blocks.Add(bi); bi.thread = new System.Threading.Thread(new System.Threading.ThreadStart(bi.threadproc)); bi.thread.Name = "RemoteJobBlock" + bi.BlockID; } MySpace.DataMining.DistributedObjects.StaticGlobals.DSpace_InputRecordLength = inputrecordlengths.Count > 0 ? inputrecordlengths[0] : -1; MySpace.DataMining.DistributedObjects.StaticGlobals.DSpace_OutputRecordLength = outputrecordlengths.Count > 0 ? outputrecordlengths[0] : -1; // Need to start threads separately due to StaticGlobals being updated. for (int BlockID = 0; BlockID < cfgj.IOSettings.DFS_IOs.Length; BlockID++) { RemoteBlockInfo bi = blocks[BlockID]; bi.rem.InputRecordLength = MySpace.DataMining.DistributedObjects.StaticGlobals.DSpace_InputRecordLength; bi.rem.InputRecordLengths = inputrecordlengths; bi.rem.OutputRecordLength = MySpace.DataMining.DistributedObjects.StaticGlobals.DSpace_OutputRecordLength; bi.rem.OutputRecordLengths = outputrecordlengths; AELight_StartTraceThread(bi.thread); } for (int BlockID = 0; BlockID < blocks.Count; BlockID++) { AELight_JoinTraceThread(blocks[BlockID].thread); blocks[BlockID].rem.Close(); if (blocks[BlockID].blockfail) { Console.Error.WriteLine("BlockID {0} on host '{1}' did not complete successfully", BlockID, (blocks[BlockID].SlaveHost != null) ? blocks[BlockID].SlaveHost : "<null>"); continue; } } List<string> dfsnames = new List<string>(); List<string> dfsnamesreplicating = new List<string>(); // Reload DFS config to make sure changes since starting get rolled in, and make sure the output file wasn't created in that time... using (LockDfsMutex()) // Needed: change between load & save should be atomic. { dc = LoadDfsConfig(); for (int BlockID = 0; BlockID < blocks.Count; BlockID++) { if (blocks[BlockID].blockfail) { continue; } { bool anyoutput = false; bool nonemptyoutputpath = false; for (int oi = 0; oi < blocks[BlockID].DFSWriters.Count; oi++) { string dfswriter = blocks[BlockID].DFSWriters[oi]; if (string.IsNullOrEmpty(dfswriter)) { if (blocks[BlockID].outputdfsnodeses[oi].Count > 0) { Console.Error.WriteLine("Output data detected with no DFSWriter specified"); } } else { { if (null != DfsFind(dc, dfswriter)) { Console.Error.WriteLine("Error: output file was created during job: {0}", dfswriter); continue; } string dfspath = dfswriter; { nonemptyoutputpath = true; dfs.DfsFile df = new dfs.DfsFile(); if (blocks[BlockID].rem.OutputRecordLengths[oi] > 0) { df.XFileType = DfsFileTypes.BINARY_RECT + "@" + blocks[BlockID].rem.OutputRecordLengths[oi].ToString(); } else if (blocks[BlockID].rem.OutputRecordLengths[oi] == -2) { df.XFileType = DfsFileTypes.BINARY_RECT + "@?"; } df.Nodes = new List<dfs.DfsFile.FileNode>(); df.Size = -1; // Preset if (dfspath.StartsWith("dfs://", StringComparison.OrdinalIgnoreCase)) { dfspath = dfspath.Substring(6); } string dfspathreplicating = ".$" + dfspath + ".$replicating-" + Guid.NewGuid().ToString(); if (null != dc.FindAny(dfspathreplicating)) { Console.Error.WriteLine("Error: file exists: file put into DFS from another location during job: " + dfspathreplicating); SetFailure(); return; } dfsnames.Add(dfspath); dfsnamesreplicating.Add(dfspathreplicating); df.Name = dfspathreplicating; bool anybad = false; long totalsize = 0; { int i = BlockID; for (int j = 0; j < blocks[i].outputdfsnodeses[oi].Count; j++) { dfs.DfsFile.FileNode fn = new dfs.DfsFile.FileNode(); fn.Host = blocks[i].slaves[(blocks[i].rem.OutputStartingPoint + j) % blocks[i].slaves.Count]; fn.Name = blocks[i].outputdfsnodeses[oi][j]; df.Nodes.Add(fn); fn.Length = -1; // Preset fn.Position = -1; // Preset if (anybad) { continue; } fn.Length = blocks[i].outputsizeses[oi][j]; fn.Position = totalsize; // Position must be set before totalsize updated! if (blocks[i].outputdfsnodeses[oi].Count != blocks[i].outputsizeses[oi].Count) { anybad = true; continue; } totalsize += blocks[i].outputsizeses[oi][j]; } } if (!anybad) { df.Size = totalsize; } if (totalsize != 0) { anyoutput = true; } // Always add the file to DFS, even if blank! dc.Files.Add(df); } } } } if (!anyoutput && verbose && nonemptyoutputpath) { Console.Write(" (no DFS output) "); ConsoleFlush(); } } } UpdateDfsXml(dc); } ReplicationPhase(verbosereplication, blocks.Count, slaves, dfsnamesreplicating); using (LockDfsMutex()) // Needed: change between load & save should be atomic. { dc = LoadDfsConfig(); // Reload in case of change or user modifications. for (int nfile = 0; nfile < dfsnames.Count; nfile++) { string dfspath = dfsnames[nfile]; string dfspathreplicating = dfsnamesreplicating[nfile]; { dfs.DfsFile dfu = dc.FindAny(dfspathreplicating); if (null != dfu) { if (null != DfsFindAny(dc, dfspath)) { Console.Error.WriteLine("Error: file exists: file put into DFS from another location during job"); SetFailure(); continue; } dfu.Name = dfspath; } } } UpdateDfsXml(dc); } if (verbose) { Console.WriteLine(); // Line after output chars. } } catch (System.Threading.ThreadAbortException) { aborting = true; } finally { { for (int si = 0; si < slaves.Length; si++) { System.Threading.Mutex m = new System.Threading.Mutex(false, "AEL_SC_" + slaves[si]); try { m.WaitOne(); } catch (System.Threading.AbandonedMutexException) { } try { System.IO.File.Delete(NetworkPathForHost(slaves[si]) + @"\slaveconfig.j" + sjid + ".xml"); } catch { } finally { m.ReleaseMutex(); m.Close(); } } } if (!aborting) { CheckUserLogs(slaves, logname); } } if (verbose) { Console.WriteLine(); Console.WriteLine("[{0}] Done", System.DateTime.Now.ToString(), System.DateTime.Now.Millisecond); for (int i = 0; i < cfgj.IOSettings.DFS_IOs.Length; i++) { Console.WriteLine("Output: {0}", cfgj.IOSettings.DFS_IOs[i].DFSWriter); } } }
internal List<dfs.DfsFile> GetOutputFilesFromESRBlocks() { //gather output file nodes and sizes from hostToESRBlocks which are not yet in replicatingoutput mode. //update dfs with temp filename, save dfs. List<dfs.DfsFile> files = new List<dfs.DfsFile>(); lock (hostToESRBlocks) { //scramble the esrblocks ordering here. List<MapReduceBlockInfo> scrambled = new List<MapReduceBlockInfo>(); foreach (KeyValuePair<string, List<MapReduceBlockInfo>> pair in hostToESRBlocks) { foreach (MapReduceBlockInfo block in pair.Value) { if (!block.replicatingoutput) { scrambled.Add(block); block.replicatingoutput = true; } } } for (int bi = 0; bi < scrambled.Count; bi++) { int ind = rnd.Next() % scrambled.Count; MapReduceBlockInfo oldvalue = scrambled[bi]; scrambled[bi] = scrambled[ind]; scrambled[ind] = oldvalue; } #if FAILOVER_DEBUG Log("esrblocks going into replication=" + scrambled.Count.ToString()); #endif if (scrambled.Count > 0) { using (LockDfsMutex()) // Needed: change between load & save should be atomic. { // Reload DFS config to make sure changes since starting get rolled in, and make sure the output file wasn't created in that time... dfs dc = LoadDfsConfig(); // Reload in case of change or user modifications. for (int nfile = 0; nfile < failoverShared.outputfiles.Count; nfile++) { dfs.DfsFile df = new dfs.DfsFile(); string ofile = failoverShared.outputfiles[nfile]; if (ofile.Length == 0) { continue; } if (failoverShared.outputrecordlengths[nfile] > 0) { df.XFileType = DfsFileTypes.BINARY_RECT + "@" + failoverShared.outputrecordlengths[nfile].ToString(); } else if (failoverShared.outputrecordlengths[nfile] == -2) { df.XFileType = DfsFileTypes.BINARY_RECT + "@?"; } df.Nodes = new List<dfs.DfsFile.FileNode>(); df.Size = -1; // Preset string dfsname = ofile; if (dfsname.StartsWith("dfs://", StringComparison.OrdinalIgnoreCase)) { dfsname = dfsname.Substring(6); } string dfsnamereplicating = ".$" + dfsname + ".$replicating-" + Guid.NewGuid().ToString(); df.Name = dfsnamereplicating; if (null != DfsFind(dc, df.Name)) { Console.Error.WriteLine("Error: output file '{0}' was created during job: " + df.Name, ofile); continue; } if (!dfsnamesToReplnames.ContainsKey(dfsname)) { dfsnamesToReplnames.Add(dfsname, new List<string>(failoverShared.dc.Replication)); } dfsnamesToReplnames[dfsname].Add(dfsnamereplicating); files.Add(df); long totalsize = 0; bool anybad = false; bool foundzero = false; foreach (MapReduceBlockInfo block in scrambled) { List<string> nodes = block.reduceoutputdfsnodeses[nfile]; List<long> sizes = block.reduceoutputsizeses[nfile]; if (nodes.Count != sizes.Count) { Console.Error.WriteLine("Warning: chunk accounting error"); } for (int j = 0; j < nodes.Count; j++) { dfs.DfsFile.FileNode fn = new dfs.DfsFile.FileNode(); fn.Host = block.SlaveHost; fn.Name = nodes[j]; df.Nodes.Add(fn); fn.Length = -1; // Preset fn.Position = -1; // Preset if (anybad) { continue; } fn.Position = totalsize; // Position must be set before totalsize updated! if (j >= sizes.Count) { Console.Error.WriteLine("Warning: size not provided for data node chunk from host " + fn.Host); anybad = true; continue; } if (0 == sizes[j]) { if (!foundzero) { foundzero = true; Console.Error.WriteLine("Warning: zero-size data node chunk encountered from host " + fn.Host); } } fn.Length = sizes[j]; totalsize += sizes[j]; } } if (!anybad) { df.Size = totalsize; } //Always produce output file, even if no data. dc.Files.Add(df); } UpdateDfsXml(dc); } } } #if FAILOVER_DEBUG { string debugtxt = "getoutputfilesfromesrblocks:" + files.Count.ToString() + Environment.NewLine; foreach (dfs.DfsFile file in files) { debugtxt += file.Name + "; nodes: " + file.Nodes.Count.ToString() + Environment.NewLine; foreach (dfs.DfsFile.FileNode fn in file.Nodes) { debugtxt += fn.Host + @"\" + fn.Name + Environment.NewLine; } } Log(debugtxt); } #endif return files; }
public static bool DfsPutJobsFileContent(string dfspath, string filecontent) { dfs dc = LoadDfsConfig(); if (null != DfsFind(dc, dfspath, DfsFileTypes.JOB)) { //throw new Exception("Unable to write jobs file: jobs file already exists"); return false; } string newactualfilehost; string newactualfilename; string newprettyfilename; // Pretty without dfs:// string ActualFile; //+++metabackup+++ string backupdir = dc.GetMetaBackupLocation(); //---metabackup--- newprettyfilename = dfspath; { if (newprettyfilename.StartsWith("dfs://", StringComparison.OrdinalIgnoreCase)) { newprettyfilename = newprettyfilename.Substring(6); } } string reason = ""; if (dfs.IsBadFilename(newprettyfilename, out reason)) { Console.Error.WriteLine("Invalid output file: {0}", reason); SetFailure(); return false; } { //string[] slaves = dc.Slaves.SlaveList.Split(';'); //newactualfilehost = slaves[(new Random()).Next() % slaves.Length]; newactualfilehost = Surrogate.MasterHost; newactualfilename = GenerateZdFileDataNodeName(newprettyfilename); } ActualFile = NetworkPathForHost(newactualfilehost) + @"\" + newactualfilename; System.IO.File.WriteAllText(ActualFile, filecontent); //+++metabackup+++ // Since this doesn't even exist in dfs.xml yet, // writing to the actual jobs file doesn't need to be transactional. if (null != backupdir) { try { string backupfile = backupdir + @"\" + newactualfilename; System.IO.File.WriteAllText(backupfile, filecontent); } catch (Exception eb) { LogOutputToFile(eb.ToString()); throw new Exception("Error writing backup: " + eb.Message, eb); } } //---metabackup--- System.IO.FileInfo finfo = new System.IO.FileInfo(ActualFile); // Need to add to DFS if it exists; // if it doesn't exist, the user probably just canceled it. if (finfo.Exists) { dfs.DfsFile df = new dfs.DfsFile(); df.Nodes = new List<dfs.DfsFile.FileNode>(1); { dfs.DfsFile.FileNode fnode = new dfs.DfsFile.FileNode(); fnode.Host = newactualfilehost; fnode.Position = 0; fnode.Length = finfo.Length; fnode.Name = newactualfilename; df.Nodes.Add(fnode); } df.Name = newprettyfilename; df.Size = finfo.Length; df.Type = DfsFileTypes.JOB; using (LockDfsMutex()) { dc = LoadDfsConfig(); // Reload in case of intermediate change. if (null != DfsFindAny(dc, newactualfilehost)) { Console.Error.WriteLine("Output file was created while editing"); SetFailure(); return false; } dc.Files.Add(df); UpdateDfsXml(dc); // ! } } return true; // ! }
static void AELightRun(string[] args) { //if (Environment.GetEnvironmentVariable("DOSERVICE") != null) { Console.OutputEncoding = Encoding.UTF8; } try { isdspace = null != Environment.GetEnvironmentVariable("DSPACE_EXE"); } catch { } if (isdspace) { appname = "Qizmt"; } else { appname = "AELight"; } if (args.Length >= 1 && args[0].StartsWith("-$")) { appname = "Qizmt"; isdspace = true; int del = args[0].IndexOf(@"\", 4); if (del > -1) { userdomain = args[0].Substring(4, del - 4); douser = args[0].Substring(del + 1); } else { userdomain = Environment.UserDomainName; douser = args[0].Substring(2); } del = douser.IndexOf("@"); if (del > -1) { dousername = douser.Substring(0, del); } else { dousername = douser; } args = SubArray(args, 1); } else { userdomain = Environment.UserDomainName; dousername = Environment.UserName; douser = Environment.UserName + "@" + System.Net.Dns.GetHostName(); } OriginalUserDir = Environment.CurrentDirectory; AELight_Dir = System.IO.Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location); jidfp = AELight_Dir + @"\" + jidfn; if (args.Length > 0 && args[0].StartsWith("-debug")) { DebugSwitch = true; DebugStepSwitch = ("-debug-step" == args[0]); #if DEBUG //Console.WriteLine("DEBUG: DebugSwitch = true"); #endif args = SubArray(args, 1); } #if DEBUG if (args.Length > 0 && "-ddebug" == args[0]) { args = SubArray(args, 1); System.Diagnostics.Debugger.Launch(); } #endif if (0 == args.Length) { ShowUsage(); return; } string act = args[0].ToLower(); if (act.StartsWith("-")) { act = act.Substring(1); } { using (System.Threading.Mutex lm = new System.Threading.Mutex(false, "DOexeclog")) { lm.WaitOne(); // Lock also taken by kill. if (System.IO.File.Exists(AELight_Dir + @"\excl.dat")) { bool isadminlocked = false; if (doexcl) { isadminlocked = false; } else { string[] lines = System.IO.File.ReadAllLines(AELight_Dir + @"\excl.dat"); if (lines.Length > 0 && null != lines[0]) { if (lines[0].StartsWith("persist=", StringComparison.OrdinalIgnoreCase)) { string adminlockuser = lines[0].Substring(8); if (0 == CompareUsers(adminlockuser, douser)) { isadminlocked = false; } } } } if (isadminlocked) { lm.ReleaseMutex(); Console.Error.WriteLine(" {0} is currently admin-locked; please try again later", appname); SetFailure(); return; } } if (System.IO.File.Exists(AELight_Dir + @"\execlock.dat")) { bool islocked = true; string[] lines = System.IO.File.ReadAllLines(AELight_Dir + @"\execlock.dat"); int pidLock = int.Parse(lines[0]); { try { System.Diagnostics.Process slaveproc = System.Diagnostics.Process.GetProcessById(pidLock); } catch (System.ArgumentException e) { #if CLIENT_LOG_ALL LogOutputToFile("CLIENT_LOG_ALL: execlock: GetProcessById: ArgumentException: " + e.ToString()); #endif // System.ArgumentException: The process specified by the processId parameter is not running. islocked = false; try { System.IO.File.Delete(AELight_Dir + @"\execlock.dat"); } catch { islocked = true; } } } if (islocked) // If still locked out from this user... { lm.ReleaseMutex(); switch (act) { case "ps": SafePS(); break; case "perfmon": Perfmon.SafeGetCounters(SubArray(args, 1)); break; case "packetsniff": SafePacketSniff(args); break; default: Console.Error.WriteLine(" {0} is currently locked for administrative tasks; please try again later", appname); SetFailure(); break; } return; // ! } } { // Acquire JID. if (!System.IO.File.Exists(jidfp)) { jid = 1; sjid = jid.ToString(); } else { string soldjid = System.IO.File.ReadAllText(jidfp).Trim(); long oldjid = long.Parse(soldjid); long newjid = unchecked(oldjid + 1); if (newjid < 1) { newjid = 1; LogOutputToFile(string.Format("Warning: JID overflow: {0} to {1}", soldjid, newjid)); } jid = newjid; sjid = jid.ToString(); } System.IO.File.WriteAllText(jidfp, sjid); } try { string sargs = ""; { StringBuilder sb = new StringBuilder(1000); for (int i = 0; i < args.Length; i++) { if (0 != sb.Length) { sb.Append(' '); } if (-1 != args[i].IndexOf(' ')) { sb.Append('"'); sb.Append(args[i].Replace("\"", "\\\"")); sb.Append('"'); } else { sb.Append(args[i].Replace("\"", "\\\"")); } } sargs = sb.ToString(); } if (string.Compare(args[0], "memcacheinstall", true) == 0) { sargs += " 73045AA6-2F6B-4166-BDE2-806F1E43854B"; } if (-1 != sargs.IndexOf("73045AA6-2F6B-4166-BDE2-806F1E43854B")) { sargs = "\t" + sargs.GetHashCode(); } string sayuser = Environment.UserName; if (null != douser) { sayuser = douser + "(" + sayuser + ")"; } { const int iMAX_SECS_RETRY = 10; // Note: doesn't consider the time spent waiting on I/O. const int ITER_MS_WAIT = 100; // Milliseconds to wait each retry. int iters = iMAX_SECS_RETRY * 1000 / ITER_MS_WAIT; for (; ; ) { try { System.IO.File.AppendAllText(AELight_Dir + @"\execlog.txt", sayuser + " [" + System.DateTime.Now.ToString() + "] " //+ OriginalUserDir + ">" + "@JID#" + sjid + " " + appname + " " + sargs + Environment.NewLine); break; } catch { if (--iters < 0) { throw; } System.Threading.Thread.Sleep(ITER_MS_WAIT); continue; } } } { const int iMAX_SECS_RETRY = 10; // Note: doesn't consider the time spent waiting on I/O. const int ITER_MS_WAIT = 100; // Milliseconds to wait each retry. int iters = iMAX_SECS_RETRY * 1000 / ITER_MS_WAIT; for (; ; ) { try { System.IO.File.AppendAllText(AELight_Dir + @"\execq.dat", System.Diagnostics.Process.GetCurrentProcess().Id.ToString() + " " + sjid + " " + act + " +++" + " " + sayuser + " [" + System.DateTime.Now.ToString() + "] " //+ OriginalUserDir + ">" + appname + " " + sargs + Environment.NewLine); break; } catch { if (--iters < 0) { throw; } System.Threading.Thread.Sleep(ITER_MS_WAIT); continue; } } } { int pid = System.Diagnostics.Process.GetCurrentProcess().Id; string spid = pid.ToString(); pidfilename = AELight_Dir + @"\" + spid + ".aelight.pid"; pidfile = new System.IO.StreamWriter(pidfilename); pidfile.WriteLine(spid); pidfile.WriteLine(System.DateTime.Now); pidfile.WriteLine("jid={0}", sjid); pidfile.Flush(); { jidfilename = AELight_Dir + @"\" + sjid + ".jid"; System.IO.StreamWriter sw = new System.IO.StreamWriter(jidfilename); sw.WriteLine(sjid); sw.WriteLine(System.DateTime.Now); sw.WriteLine("pid={0}", spid); //sw.Flush(); sw.Close(); } } } finally { lm.ReleaseMutex(); } } } try { System.Diagnostics.Process.GetCurrentProcess().PriorityClass = System.Diagnostics.ProcessPriorityClass.AboveNormal; } catch { Console.Error.WriteLine("Warning: unable to change priority class of {0} process", appname); } bool bypasshostverify = true; if (args.Length >= 1 && args[0] == "-nv") { bypasshostverify = true; args = SubArray(args, 1); } if (args.Length >= 1 && args[0] == "-v") { bypasshostverify = false; args = SubArray(args, 1); } if (args.Length > 2 && (0 == string.Compare(args[0], "-dfs", StringComparison.OrdinalIgnoreCase) || 0 == string.Compare(args[0], "dfs", StringComparison.OrdinalIgnoreCase)) && args[1].Contains("format")) { bypasshostverify = true; } if (!bypasshostverify) { if (!VerifyHostPermissions()) { Console.Error.WriteLine("Host permissions verification error; aborting"); return; } } switch (act) { case "memcache": MemCacheCommand(SubArray(args, 1)); break; case "enablefilescanner": { dfs dc = LoadDfsConfig(); if (dc.FileDaemon == null) { dc.FileDaemon = new dfs.ConfigFileDaemon(); long allchunkcount = 0; foreach (dfs.DfsFile df in dc.Files) { allchunkcount += df.Nodes.Count; } const int OneMin = 1000 * 60; const int ThreeDays = OneMin * 60 * 24 * 3; dc.FileDaemon.ScanChunkSleep = (allchunkcount < 1) ? 0 : (int)(ThreeDays / allchunkcount); if (dc.FileDaemon.ScanChunkSleep < 200) { dc.FileDaemon.ScanChunkSleep = 200; } if (dc.FileDaemon.ScanChunkSleep > OneMin * 60) { dc.FileDaemon.ScanChunkSleep = OneMin * 60; } } dc.FileDaemon.Enabled = true; UpdateDfsXml(dc); Console.WriteLine("File scanner is now enabled"); if (!dc.FileDaemon.AutoRepair) { Console.WriteLine("AutoRepair setting is false; problems found will not be repaired"); } if (dc.Replication < 2) { Console.WriteLine("Note: replication is currently not enabled"); } Console.WriteLine("killall must be issued for file scanner changes to take effect"); } break; case "disablefilescanner": { dfs dc = LoadDfsConfig(); if (dc.FileDaemon != null) { dc.FileDaemon.Enabled = false; UpdateDfsXml(dc); } Console.WriteLine("File scanner is now disabled"); Console.WriteLine("killall must be issued for file scanner changes to take effect"); } break; case "repairlog": { try { Console.Write(System.IO.File.ReadAllText("filerepairlog.txt")); } catch (System.IO.FileNotFoundException) { Console.Error.WriteLine("No repair log file found"); } } break; case "clearrepairlog": System.IO.File.Delete("filerepairlog.txt"); Console.Write('.'); Console.WriteLine(); break; case "hdhistory": { int days = 1; string host = "localhost"; if (args.Length > 1) { days = int.Parse(args[1]); if (args.Length > 2) { host = args[2]; } } try { DateTime cutoff = DateTime.Now.AddDays(-days); string netpath = Surrogate.NetworkPathForHost(host); using (System.IO.StreamReader sr = new System.IO.StreamReader(netpath + @"\harddrive_history.txt")) { for (; ; ) { string ln = sr.ReadLine(); if (null == ln) { break; } string[] parts = ln.Split('|'); if (parts.Length > 0) { string part = parts[0].Trim(); string partfind = "Sample Taken: "; if (part.StartsWith(partfind)) { try { string sdt = part.Substring(partfind.Length); DateTime dt = DateTime.Parse(sdt); if (dt > cutoff) { Console.WriteLine(ln); } } catch { } } } } } } catch (System.IO.FileNotFoundException) { } } break; case "dfscheck": case "checkdfs": DfsCheck(SubArray(args, 1)); break; case "dfsfix": case "fixdfs": DfsFix(SubArray(args, 1)); break; case "chkdfs": ChkDfs(SubArray(args, 1)); break; case "rfileview": { if (args.Length <= 1) { Console.Error.WriteLine("Invalid arguments, expected <rfilename>"); SetFailure(); return; } string rfilename = args[1]; string[] hosts; if (args.Length > 2) { if (args[2].StartsWith("@")) { hosts = Surrogate.GetHostsFromFile(args[2].Substring(1)); } else { hosts = args[2].Split(',', ';'); } } else { dfs dc = LoadDfsConfig(); hosts = dc.Slaves.SlaveList.Split(',', ';'); } for (int ihost = 0; ihost < hosts.Length; ihost++) { string host = hosts[ihost]; Console.WriteLine(); if (ihost > 0) { Console.WriteLine("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"); //Console.WriteLine(); } try { string netfp = Surrogate.NetworkPathForHost(host) + @"\" + rfilename; if (System.IO.File.Exists(netfp)) { string content = System.IO.File.ReadAllText(netfp); Console.WriteLine("{0}:", host); Console.WriteLine(); Console.WriteLine(content.Trim()); } else { Console.WriteLine("File does not exist on host {0}", host); } } catch (Exception e) { Console.Error.WriteLine("Error from host {0}:", host); Console.Error.WriteLine(e.ToString()); } } } break; case "notifyfinish": { if (args.Length <= 2) { Console.Error.WriteLine("Invalid arguments, expected <jobid> <email>"); SetFailure(); return; } #if DEBUG bool _dbypass = args[1].StartsWith("+"); #endif long WaitOnJID; string JobInfo = null; if (!long.TryParse(args[1], out WaitOnJID) || WaitOnJID < 1) { WaitOnJID = -1; JobInfo = args[1]; } string email = args[2]; string UserMessage = null; if (args.Length > 3) { UserMessage = args[3]; } #if DEBUG if (!_dbypass) #endif if (-1 != WaitOnJID) { if (!System.IO.File.Exists(WaitOnJID.ToString() + ".jid")) { Console.WriteLine("JobID {0} is not running", WaitOnJID); return; } } dfs dc = LoadDfsConfig(); if (dc.SMTP == null) { Console.Error.WriteLine("The SMTP server must be set before adding notifications"); Console.Error.WriteLine("Use 'clusterconfigupdate SMTP' to set the SMTP server"); SetFailure(); return; } MySpace.DataMining.DistributedObjects.Scheduler.NotifyInfo.NEntry ne; if (JobInfo != null) { ne = MySpace.DataMining.DistributedObjects.Scheduler.AddNotify(JobInfo, email, douser, UserMessage); } else { ne = MySpace.DataMining.DistributedObjects.Scheduler.AddNotify(WaitOnJID, email, douser, UserMessage); } Console.WriteLine("Notify Identifier: {0}", ne.ID); } break; case "notifykill": case "killnotify": { if (args.Length <= 1) { Console.Error.WriteLine("Error: expected NID"); SetFailure(); return; } string snid = args[1]; long nid; try { nid = long.Parse(snid); if (nid <= 0) { throw new Exception("Must be greater than 0"); } //snid = nid.ToString(); // Normalize. } catch (Exception e) { Console.Error.WriteLine("Invalid NID '{0}': {1}", snid, e.Message); SetFailure(); return; } if (!MySpace.DataMining.DistributedObjects.Scheduler.NotifyKill(nid)) { Console.Error.WriteLine("No such NID: {0}", snid); SetFailure(); return; } Console.WriteLine("Done"); } break; case "clearnotify": if (args.Length > 1) { Console.Error.WriteLine("Invalid arguments"); SetFailure(); return; } MySpace.DataMining.DistributedObjects.Scheduler.ClearNotify(); Console.WriteLine("Notifications cleared"); break; case "viewnotify": { IList<MySpace.DataMining.DistributedObjects.Scheduler.NotifyInfo.NEntry> notify = MySpace.DataMining.DistributedObjects.Scheduler.GetNotifySnapshot(); if (0 == notify.Count) { Console.WriteLine(" None"); } else { foreach (MySpace.DataMining.DistributedObjects.Scheduler.NotifyInfo.NEntry ne in notify) { if (-1 == ne.WaitOnJID) { Console.WriteLine(" {0} {1} Waiting on \"{2}\" to e-mail {3}", ne.ID, ne.UserAdded, ne.WaitOnJobInfo, ne.Email); } else { Console.WriteLine(" {0} {1} Waiting on JobID {2} to e-mail {3}", ne.ID, ne.UserAdded, ne.WaitOnJID, ne.Email); } } } } break; case "enqueue": { MySpace.DataMining.DistributedObjects.Scheduler.ScheduleInfo.QEntry qe; try { #if DEBUG //System.Threading.Thread.Sleep(1000 * 8); #endif qe = MySpace.DataMining.DistributedObjects.Scheduler.Enqueue(SubArray(args, 1), douser); } catch (Exception e) { Console.Error.WriteLine(e.Message); SetFailure(); return; } Console.WriteLine("Queue Identifier: {0}", qe.ID); Console.WriteLine("Enqueued: {0}", qe.Command); Console.WriteLine("Position: {0}", MySpace.DataMining.DistributedObjects.Scheduler.GetQueueSnapshot().Count); } break; case "queuekill": { if (args.Length <= 1) { Console.Error.WriteLine("Error: expected QID"); SetFailure(); return; } string sqid = args[1]; long qid; try { qid = long.Parse(sqid); if (qid <= 0) { throw new Exception("Must be greater than 0"); } //sqid = qid.ToString(); // Normalize. } catch (Exception e) { Console.Error.WriteLine("Invalid QID '{0}': {1}", sqid, e.Message); SetFailure(); return; } if (!MySpace.DataMining.DistributedObjects.Scheduler.QueueKill(qid)) { Console.Error.WriteLine("No such QID: {0}", sqid); SetFailure(); return; } Console.WriteLine("Done"); } break; case "clearqueue": { MySpace.DataMining.DistributedObjects.Scheduler.ClearQueue(); Console.WriteLine("Queue cleared"); } break; case "pausequeue": case "queuepause": MySpace.DataMining.DistributedObjects.Scheduler.PauseQueue(true); Console.WriteLine("Done"); break; case "unpausequeue": case "queueunpause": MySpace.DataMining.DistributedObjects.Scheduler.PauseQueue(false); Console.WriteLine("Done"); break; case "schedule": { #if DEBUG //System.Threading.Thread.Sleep(1000 * 8); #endif MySpace.DataMining.DistributedObjects.Scheduler.ScheduleInfo.SEntry se; try { se = MySpace.DataMining.DistributedObjects.Scheduler.Schedule(SubArray(args, 1), douser); } catch (Exception e) { Console.Error.WriteLine(e.Message); SetFailure(); return; } Console.WriteLine("Schedule Identifier: {0}", se.ID); Console.WriteLine("Scheduled: {0}", se.Command); Console.WriteLine("First Run: {0}", se.NextRun); #if DEBUG if (!string.IsNullOrEmpty(se.texceptions)) { DateTime dt = DateTime.MaxValue; List<MySpace.DataMining.DistributedObjects.Scheduler.ScheduleInfo.SEntry.TimeSpec.Range> xrs = MySpace.DataMining.DistributedObjects.Scheduler.ScheduleInfo.SEntry.ParseTExceptions( se.texceptions, se.NextRun); foreach (MySpace.DataMining.DistributedObjects.Scheduler.ScheduleInfo.SEntry.TimeSpec.Range xr in xrs) { if (xr.first < dt) { dt = xr.first; } } Console.WriteLine("DEBUG First texception: {0}", dt); } #endif #if DEBUG if (!string.IsNullOrEmpty(se.wtexceptions)) { DateTime dt = DateTime.MaxValue; List<MySpace.DataMining.DistributedObjects.Scheduler.ScheduleInfo.SEntry.TimeSpec.Range> xrs = MySpace.DataMining.DistributedObjects.Scheduler.ScheduleInfo.SEntry.ParseTExceptions( se.wtexceptions, se.NextRun); foreach (MySpace.DataMining.DistributedObjects.Scheduler.ScheduleInfo.SEntry.TimeSpec.Range xr in xrs) { if (xr.first < dt) { dt = xr.first; } } Console.WriteLine("DEBUG First wtexception: {0}", dt); } #endif } break; case "pauseschedule": case "schedulepause": { if (args.Length <= 1) { Console.Error.WriteLine("Error: expected SID"); SetFailure(); return; } string ssid = args[1]; long sid; try { sid = long.Parse(ssid); if (sid <= 0) { throw new Exception("Must be greater than 0"); } //ssid = sid.ToString(); // Normalize. } catch (Exception e) { Console.Error.WriteLine("Invalid SID '{0}': {1}", ssid, e.Message); SetFailure(); return; } if (!MySpace.DataMining.DistributedObjects.Scheduler.PauseSchedule(sid, true)) { Console.Error.WriteLine("No such SID: {0}", ssid); SetFailure(); return; } Console.WriteLine("Done"); } break; case "unpauseschedule": case "scheduleunpause": { if (args.Length <= 1) { Console.Error.WriteLine("Error: expected SID"); SetFailure(); return; } string ssid = args[1]; long sid; try { sid = long.Parse(ssid); if (sid <= 0) { throw new Exception("Must be greater than 0"); } //ssid = sid.ToString(); // Normalize. } catch (Exception e) { Console.Error.WriteLine("Invalid SID '{0}': {1}", ssid, e.Message); SetFailure(); return; } if (!MySpace.DataMining.DistributedObjects.Scheduler.PauseSchedule(sid, false)) { Console.Error.WriteLine("No such SID: {0}", ssid); SetFailure(); return; } Console.WriteLine("Done"); } break; case "unschedule": { if (args.Length <= 1) { Console.Error.WriteLine("Error: expected SID"); SetFailure(); return; } string ssid = args[1]; long sid; try { sid = long.Parse(ssid); if (sid <= 0) { throw new Exception("Must be greater than 0"); } //ssid = sid.ToString(); // Normalize. } catch (Exception e) { Console.Error.WriteLine("Invalid SID '{0}': {1}", ssid, e.Message); SetFailure(); return; } if (!MySpace.DataMining.DistributedObjects.Scheduler.Unschedule(sid)) { Console.Error.WriteLine("No such SID: {0}", ssid); SetFailure(); return; } Console.WriteLine("Done"); } break; case "clearschedule": { MySpace.DataMining.DistributedObjects.Scheduler.ClearSchedule(); Console.WriteLine("Schedule cleared"); } break; case "viewjob": { int iarg = 1; bool attach = false; if (args.Length > iarg && "-a" == args[iarg]) { attach = true; iarg++; } if (args.Length <= iarg) { Console.Error.WriteLine("Not enough arguments; expected JobID"); } string viewsjid = args[iarg++]; long viewjid = long.Parse(viewsjid); viewsjid = viewjid.ToString(); // Normalize. try { using (System.IO.FileStream stm = new System.IO.FileStream(AELight_Dir + @"\stdout.jid" + viewsjid + ".jso", System.IO.FileMode.Open, System.IO.FileAccess.Read, System.IO.FileShare.ReadWrite)) { using (System.IO.StreamReader sr = new System.IO.StreamReader(stm)) { char[] cbuf = new char[500]; for (; ; ) { int nchars = sr.Read(cbuf, 0, cbuf.Length); if (nchars < 1) { if (!attach || !System.IO.File.Exists(AELight_Dir + @"\" + viewsjid + ".jid")) { break; } System.Threading.Thread.Sleep(1000 * 3); continue; } Console.Write(cbuf, 0, nchars); } ConsoleFlush(); } } } catch (System.IO.FileNotFoundException) { Console.Error.WriteLine("Standard output for JID {0} not found", viewsjid); } } break; case "spread": { #if STDOUT_LOG StdoutLog.Start(); #endif if (args.Length <= 2) { Console.Error.WriteLine("Expected: {0} spread <input-dfsfile> <output-dfsfile>", appname); SetFailure(); return; } string infn = args[1]; if (infn.StartsWith("dfs://", StringComparison.OrdinalIgnoreCase)) { infn = infn.Substring(6); } if (-1 != infn.IndexOf('@')) { Console.Error.WriteLine("Record length not expected: {0}", infn); SetFailure(); return; } string outfn = args[2]; if (outfn.StartsWith("dfs://", StringComparison.OrdinalIgnoreCase)) { outfn = outfn.Substring(6); } if (-1 != outfn.IndexOf('@')) { Console.Error.WriteLine("Record length not expected: {0}", outfn); SetFailure(); return; } dfs dc = LoadDfsConfig(); dfs.DfsFile inf = dc.FindAny(infn); if (null == inf) { Console.Error.WriteLine("Input file not found in DFS: {0}", infn); SetFailure(); return; } string dfsinput, dfsoutput; if (inf.RecordLength > 0) { dfsinput = "dfs://" + infn + "@" + inf.RecordLength; dfsoutput = "dfs://" + outfn + "@" + inf.RecordLength; } else { dfsinput = "dfs://" + infn; dfsoutput = "dfs://" + outfn; } if (null != DfsFindAny(dc, outfn)) { Console.Error.WriteLine("Output file already exists in DFS: {0}", outfn); SetFailure(); return; } string tempfnpost = "." + Guid.NewGuid().ToString() + "." + System.Diagnostics.Process.GetCurrentProcess().Id.ToString(); string jobsfn = "spread-jobs.xml" + tempfnpost; try { using (System.IO.StreamWriter sw = System.IO.File.CreateText(jobsfn)) { sw.Write((@"<SourceCode> <Jobs> <Job Name=`spread` Custodian=`` Email=``> <IOSettings> <JobType>mapreduce</JobType> <KeyLength>int</KeyLength> <DFSInput>" + dfsinput + @"</DFSInput> <DFSOutput>" + dfsoutput + @"</DFSOutput> <OutputMethod>grouped</OutputMethod> </IOSettings> <MapReduce> <Map> <![CDATA[ byte[] keybuf = null; ByteSlice keybs; int ikey; public virtual void Map(ByteSlice line, MapOutput output) { if(null == keybuf) { keybuf = new byte[4]; keybs = ByteSlice.Prepare(keybuf); ikey = Qizmt_ProcessID; } Entry.ToBytes(ikey, keybuf, 0); output.Add(keybs, line); ikey = unchecked(ikey + Qizmt_ProcessCount); } ]]> </Map> <Reduce> <![CDATA[ public override void Reduce(ByteSlice key, ByteSliceList values, ReduceOutput output) { while(values.MoveNext()) { output.Add(values.Current); } } ]]> </Reduce> </MapReduce> </Job> </Jobs> </SourceCode> ") .Replace('`', '"')); } Console.WriteLine("Spreading..."); //Exec("", LoadConfig(xpaths, jobsfn), new string[] { }, false, false); Exec("", LoadConfig(jobsfn), new string[] { }, false, false); Console.WriteLine(); Console.WriteLine("Successfully spread '{0}' to '{1}'", infn, outfn); } finally { try { System.IO.File.Delete(jobsfn); } catch { } } } break; case "kill": case "killst": case "killmt": { bool singlethreaded = ("killst" == act); if (args.Length > 1) { string killsjid = args[1]; long killjid; try { killjid = long.Parse(killsjid); if (killjid <= 0) { throw new Exception("Must be greater than 0"); } killsjid = killjid.ToString(); // Normalize. } catch (Exception e) { Console.Error.WriteLine("Invalid JID '{0}': {1}", killsjid, e.Message); SetFailure(); return; } if (killjid == jid) { Console.WriteLine("Process suicide"); return; } { bool qverbose = args.Length > 2 && "?" == args[2]; bool dotverbose = args.Length > 2 && "." == args[2]; int killAelightPid = 0; string killAelightSPid = "0"; bool killjidexists = false; using (System.Threading.Mutex killmutex = new System.Threading.Mutex(false, "DOkillj" + killsjid)) { killmutex.WaitOne(); // Can abandon if kill gets killed, but we should be alerted. string killsjidfp = AELight_Dir + @"\" + killsjid + ".jid"; if (!System.IO.File.Exists(killsjidfp)) { //killjidexists = false; } else { killjidexists = true; Console.WriteLine("Killing {0}: {1}", killsjid, ""); dfs dc = LoadDfsConfig(); string[] slaves = dc.Slaves.SlaveList.Split(';'); int numthreads = 1; if (!singlethreaded) { numthreads = slaves.Length; } List<System.Threading.Mutex> mutexes = new List<System.Threading.Mutex>(); bool mutexesNeedSafePoint = true; { mutexes.Add(new System.Threading.Mutex(false, "AEDFSM")); mutexes.Add(new System.Threading.Mutex(false, "DOexeclog")); // Also adding compiler mutex so that the frozen aelight being // killed doesn't prevent other processes from compiling... mutexes.Add(new System.Threading.Mutex(false, "DynCmp")); } string[] jidflines = null; try { jidflines = System.IO.File.ReadAllLines(killsjidfp); } catch (System.IO.FileNotFoundException) { // This can happen if the job finishes after the previous file check. killjidexists = false; } if (null != jidflines) { foreach (string ln in jidflines) { if (ln.StartsWith("pid=")) { killAelightSPid = ln.Substring(4); killAelightPid = int.Parse(killAelightSPid); killAelightSPid = killAelightPid.ToString(); // Normalize. try { System.Diagnostics.Process xproc = System.Diagnostics.Process.GetProcessById(killAelightPid); if (mutexesNeedSafePoint) { HogMutexes(true, mutexes); } try { foreach (System.Diagnostics.ProcessThread pt in xproc.Threads) { IntPtr hthd = OpenThread(0x2 /* suspend/resume */, false, (uint)pt.Id); if (IntPtr.Zero == hthd) { throw new Exception("Insufficient access to thread"); } SuspendThread(hthd); #if DEBUG if (qverbose) { lock (slaves) { Console.Write("(suspended thread {0})", pt.Id); ConsoleFlush(); } } #endif } mutexesNeedSafePoint = false; } finally { HogMutexes(false, mutexes); } xproc.Close(); } catch (Exception exf) { if (qverbose) { lock (slaves) { Console.Write("(Unable to suspend AELight threads: {0})", exf.Message); ConsoleFlush(); } } } } } System.Threading.Thread.Sleep(1000); // Allow slaves to initialize. { //foreach (string slave in slaves) MySpace.DataMining.Threading.ThreadTools<string>.Parallel( new Action<string>( delegate(string slave) { string netpath = Surrogate.NetworkPathForHost(slave); foreach (System.IO.FileInfo fi in (new System.IO.DirectoryInfo(netpath)) .GetFiles("*.j" + killsjid + ".slave.pid")) { string spidStopRemote = fi.Name.Substring(0, fi.Name.IndexOf('.')); try { System.Net.Sockets.NetworkStream nstm = Surrogate.ConnectService(slave); nstm.WriteByte((byte)'k'); XContent.SendXContent(nstm, spidStopRemote); if ('+' != nstm.ReadByte()) { throw new Exception("Remote machine did not report a success during kill operation"); } nstm.Close(); fi.Delete(); } catch (Exception e) { LogOutputToFile("Unable to kill Slave PID " + netpath + "\\" + spidStopRemote + " belonging to JID " + killsjid + ": " + e.ToString()); } } }), slaves, numthreads); } System.Threading.Thread.Sleep(1000); // Allow slaves to finalize. string killjzm = "zmap_*_*.j" + killsjid + ".zm"; string killjzb = "zblock_*.j" + killsjid + ".zb"; string killjoblog = "*_????????-????-????-????-????????????.j" + killsjid + "_log.txt"; string killzf = "zfoil_*.j" + killsjid + ".zf"; string killslaveconfig = "slaveconfig.j" + killsjid + ".xml"; //foreach (string slave in slaves) MySpace.DataMining.Threading.ThreadTools<string>.Parallel( new Action<string>( delegate(string slave) { string netpath = Surrogate.NetworkPathForHost(slave); { // Delete leaked chunks only! Have to check with DFS.xml Dictionary<string, bool> dcnodes = new Dictionary<string, bool>(StringComparer.OrdinalIgnoreCase); foreach (dfs.DfsFile df in dc.Files) { for (int ifn = 0; ifn < df.Nodes.Count; ifn++) { string nn = df.Nodes[ifn].Name; if (!dcnodes.ContainsKey(nn)) { dcnodes.Add(nn, true); } } } try { string killcheckjzd = "zd.*.*.j" + killsjid + ".zd"; foreach (System.IO.FileInfo fi in (new System.IO.DirectoryInfo(netpath)).GetFiles(killcheckjzd)) { if (!dcnodes.ContainsKey(fi.Name)) { for (int fiDeletes = 0; ; fiDeletes++) { try { fi.Delete(); break; } catch { if (fiDeletes >= 100) { throw; } System.Threading.Thread.Sleep(100); continue; } } if (qverbose) { lock (slaves) { Console.Write("(deleted {0})", fi.FullName); } } if (dotverbose || qverbose) { lock (slaves) { Console.Write('.'); ConsoleFlush(); } } try { string fisamplename = fi.FullName + ".zsa"; System.IO.File.Delete(fisamplename); /*if (dverbose) { lock (slaves) { Console.Write("(deleted {0})", fisamplename); //ConsoleFlush(); } }*/ if (dotverbose || qverbose) { lock (slaves) { Console.Write('.'); ConsoleFlush(); } } } catch { } } } } catch (Exception e) { LogOutput("Unable to delete incomplete MR.DFS data belonging to JID " + killsjid + ": " + e.Message); LogOutputToFile("Unable to delete incomplete MR.DFS data belonging to JID " + killsjid + ": " + e.ToString()); } } try { foreach (System.IO.FileInfo fi in (new System.IO.DirectoryInfo(netpath)).GetFiles(killjzm)) { for (int fiDeletes = 0; ; fiDeletes++) { try { fi.Delete(); break; } catch { if (fiDeletes >= 100) { throw; } System.Threading.Thread.Sleep(100); continue; } } /*if (dverbose) { lock (slaves) { Console.Write("(deleted {0})", fi.FullName); //ConsoleFlush(); } }*/ if (dotverbose || qverbose) { lock (slaves) { Console.Write('.'); ConsoleFlush(); } } } foreach (System.IO.FileInfo fi in (new System.IO.DirectoryInfo(netpath)).GetFiles(killjzb)) { for (int fiDeletes = 0; ; fiDeletes++) { try { fi.Delete(); break; } catch { if (fiDeletes >= 100) { throw; } System.Threading.Thread.Sleep(100); continue; } } /*if (dverbose) { lock (slaves) { Console.Write("(deleted {0})", fi.FullName); //ConsoleFlush(); } }*/ if (dotverbose || qverbose) { lock (slaves) { Console.Write('.'); ConsoleFlush(); } } } foreach (System.IO.FileInfo fi in (new System.IO.DirectoryInfo(netpath)).GetFiles(killzf)) { for (int fiDeletes = 0; ; fiDeletes++) { try { fi.Delete(); break; } catch { if (fiDeletes >= 100) { throw; } System.Threading.Thread.Sleep(100); continue; } } /*if (dverbose) { lock (slaves) { Console.Write("(deleted {0})", fi.FullName); //ConsoleFlush(); } }*/ if (dotverbose || qverbose) { lock (slaves) { Console.Write('.'); ConsoleFlush(); } } } } catch (Exception e) { LogOutput("Unable to delete intermediate data belonging to JID " + killsjid + ": " + e.Message); LogOutputToFile("Unable to delete intermediate data belonging to JID " + killsjid + ": " + e.ToString()); } try { foreach (System.IO.FileInfo fi in (new System.IO.DirectoryInfo(netpath)).GetFiles(killjoblog)) { for (int fiDeletes = 0; ; fiDeletes++) { try { fi.Delete(); break; } catch { if (fiDeletes >= 100) { throw; } System.Threading.Thread.Sleep(100); continue; } } /*if (dverbose) { lock (slaves) { Console.Write("(deleted {0})", fi.FullName); //ConsoleFlush(); } }*/ if (dotverbose || qverbose) { lock (slaves) { Console.Write('.'); ConsoleFlush(); } } } } catch (Exception e) { LogOutput("Unable to delete log files belonging to JID " + killsjid + ": " + e.Message); LogOutputToFile("Unable to delete log files belonging to JID " + killsjid + ": " + e.ToString()); } try { System.IO.File.Delete(netpath + @"\" + killslaveconfig); } catch { // This is allowed to fail: the file might not exist. } }), slaves, numthreads); } if (0 != killAelightPid) { try { System.Diagnostics.Process killproc = System.Diagnostics.Process.GetProcessById(killAelightPid); if (mutexesNeedSafePoint) { HogMutexes(true, mutexes); } try { killproc.Kill(); killproc.WaitForExit(1000 * 1); mutexesNeedSafePoint = false; } finally { HogMutexes(false, mutexes); } killproc.WaitForExit(1000 * 10); // Can wait longer outside mutexes. killproc.Close(); } catch (Exception e) { LogOutputToFile("Unable to kill Surrogate PID " + killAelightSPid + " belonging to JID " + killsjid + ": " + e.ToString()); } try { System.IO.File.Delete(AELight_Dir + @"\" + killAelightSPid + ".aelight.pid"); } catch { } } System.IO.File.Delete(killsjidfp); } killmutex.ReleaseMutex(); } if (killjidexists) { if (0 != killAelightPid) { if (CleanExecQ(killAelightPid, killjid)) { Console.WriteLine("kill success"); } else { CleanExecQ(0, killjid); // Still clean it from ps. Console.WriteLine("kill warning: Surrogate PID mismatch (ps)"); return; } } else { CleanExecQ(0, killjid); // Still clean it from ps. Console.WriteLine("kill warning: unable to find Surrogate process"); return; } } else { if (CleanExecQ(0, killjid)) { // Not running, but was still cleaned from ps. Console.WriteLine("kill warning: JID {0} not running", killsjid); return; } else { Console.Error.WriteLine("kill failure: JID {0} not running", killsjid); SetFailure(); return; } } } } else { Console.Error.WriteLine("Expected JID to kill"); SetFailure(); return; } } break; case "regressiontest": case "regressiontests": RunRegressionTests(SubArray(args, 1)); break; case "recordsize": if (args.Length > 1) { Console.WriteLine("{0}", Surrogate.GetRecordSize(args[1])); } else { Console.Error.WriteLine("Expected user-friendly record size string"); SetFailure(); return; } break; case "setname": { if (args.Length <= 1) { Console.Error.WriteLine("Expected new cluster name"); SetFailure(); return; } else { string newname = args[1]; using (LockDfsMutex()) { dfs dc = LoadDfsConfig(); dc.ClusterName = newname; UpdateDfsXml(dc); } Console.WriteLine("Cluster name set to {0}", newname); } } break; case "viewname": { bool machinereadable = args.Length > 1 && "-m" == args[1]; dfs dc = LoadDfsConfig(); if (dc.ClusterName == null) { Console.Error.WriteLine("Cluster name not set"); SetFailure(); return; } else { if (machinereadable) { Console.WriteLine(dc.ClusterName); } else { Console.WriteLine("Cluster name is {0}", dc.ClusterName); } } } break; case "exec": if (args.Length < 2) { Console.Error.WriteLine("Invalid arguments for " + args[0]); ShowUsage(); } else { #if DEBUG //System.Threading.Thread.Sleep(1000 * 8); #endif try { int iarg = 1; string ExecOpts = ""; List<string> xpaths = null; //bool showjid = false; while (iarg < args.Length) { switch (args[iarg][0]) { case '-': if (0 == string.Compare("-JID", args[iarg], true)) { //showjid = true; } else { ExecOpts += " " + args[iarg].Substring(1); } iarg++; // Important. continue; case '/': if (null == xpaths) { xpaths = new List<string>(); } xpaths.Add(args[iarg]); iarg++; // Important. continue; } break; } //if (showjid) { //Console.WriteLine("JID={0}", sjid); Console.WriteLine("Job Identifier: {0}", sjid); } if (iarg >= args.Length) { Console.Error.WriteLine("Invalid arguments for " + args[0] + ": expected jobs file name to execute"); SetFailure(); return; } //#if DEBUG if (args[iarg].StartsWith("file://", StringComparison.OrdinalIgnoreCase)) { CurrentJobFileName = args[iarg].Substring(7); Exec(ExecOpts, LoadConfig(args[iarg].Substring(7)), SubArray(args, iarg + 1), true); return; } //#endif dfs dc = LoadDfsConfig(); dfs.DfsFile dfjob = DfsFind(dc, args[iarg], DfsFileTypes.JOB); if (null == dfjob) { Console.Error.WriteLine("exec jobs file not found in DFS: {0}", args[iarg]); SetFailure(); return; } if (dfjob.Nodes.Count != 1) { throw new Exception("Error: exec jobs file not in correct jobs DFS format"); } CurrentJobFileName = dfjob.Name; string ejnetpath = NetworkPathForHost(dfjob.Nodes[0].Host.Split(';')[0]) + @"\" + dfjob.Nodes[0].Name; if (dc.LogExecHistory > 0) { LogExecHistory(args, ejnetpath, dc.LogExecHistory); } #if STDOUT_LOG StdoutLog.Start(); #endif Exec(ExecOpts, LoadConfig(xpaths, ejnetpath), SubArray(args, iarg + 1), true); } catch (Exception e) { LogOutput(e.ToString()); } } break; case "ghost": case "ghostmt": case "ghostst": { dfs dc = LoadDfsConfig(); string[] hosts = dc.Slaves.SlaveList.Split(';'); bool singlethreaded = act == "ghostst"; int threadcount = singlethreaded ? 1 : hosts.Length; if (threadcount > 15) { threadcount = 15; } Dictionary<string, bool> goodnames = new Dictionary<string, bool>(100); // Lowercase file name key. List<System.Text.RegularExpressions.Regex> snowballregexes = new List<System.Text.RegularExpressions.Regex>(); List<string> mappedsamplenames = new List<string>(); foreach (dfs.DfsFile df in dc.Files) { if (0 == string.Compare(df.Type, DfsFileTypes.DELTA, StringComparison.OrdinalIgnoreCase)) { string snowballname = df.Name; string srex = Surrogate.WildcardRegexString(GetSnowballFilesWildcard(snowballname)); System.Text.RegularExpressions.Regex rex = new System.Text.RegularExpressions.Regex(srex, System.Text.RegularExpressions.RegexOptions.IgnoreCase); snowballregexes.Add(rex); string fnms = "zsballsample_" + snowballname + ".zsb"; mappedsamplenames.Add(fnms); } } long nghosts = 0; MySpace.DataMining.Threading.ThreadTools<string>.Parallel( new Action<string>(delegate(string host) //for (int hi = 0; hi < hosts.Length; hi++) { string netpath = Surrogate.NetworkPathForHost(host); { // Clean leaked snowballs... int snowballregexesCount = snowballregexes.Count; foreach (System.IO.FileInfo fi in (new System.IO.DirectoryInfo(netpath)).GetFiles("zsball_*.zsb")) { bool goodsnowball = false; for (int i = 0; i < snowballregexesCount; i++) { if (snowballregexes[i].IsMatch(fi.Name)) { goodsnowball = true; break; } } if (!goodsnowball) { lock (hosts) { nghosts++; Console.WriteLine(" Ghost data file: {0}", fi.Name); } } } int mappedsamplenamesCount = mappedsamplenames.Count; foreach (System.IO.FileInfo fi in (new System.IO.DirectoryInfo(netpath)).GetFiles("zsballsample_*.zsb")) { bool goodmappedsamples = false; for (int i = 0; i < mappedsamplenamesCount; i++) { if (0 == string.Compare(mappedsamplenames[i], fi.Name)) { goodmappedsamples = true; break; } } if (!goodmappedsamples) { lock (hosts) { nghosts++; Console.WriteLine(" Ghost data file: {0}", fi.Name); } } } } } ), hosts, threadcount); Console.WriteLine("Found {0} ghost data files", nghosts); } break; case "restoresurrogate": { // restoresurrogate [-nostop] <metabackup-path> <target-dspace-path> [<new-metabackup-path>] int iarg = 1; bool stop = true; while (args.Length > iarg && args[iarg][0] == '-') { string arg = args[iarg++]; switch (arg) { case "-nostop": stop = false; break; default: Console.Error.WriteLine("Unknown switch for restoresurrogate: {0}", arg); SetFailure(); return; } } if (args.Length <= iarg) { Console.Error.WriteLine("Expected <metabackup-path>; not enough arguments for restoresurrogate"); SetFailure(); return; } string metabackuplocation = args[iarg++]; if (args.Length <= iarg) { Console.Error.WriteLine("Expected <target-" + appname + "-path>; not enough arguments for restoresurrogate"); SetFailure(); return; } string targetdspacepath = args[iarg++]; string newmetabackuppath = ""; if (args.Length > iarg) { newmetabackuppath = args[iarg++]; } if (!string.IsNullOrEmpty(newmetabackuppath)) { if (!System.IO.Directory.Exists(newmetabackuppath)) { System.IO.Directory.CreateDirectory(newmetabackuppath); } } string metabackupdfsxmlpath; if (System.IO.Directory.Exists(metabackuplocation)) { string[] xmlfiles = System.IO.Directory.GetFiles(metabackuplocation, "*.xml"); if (xmlfiles.Length > 1) { Console.Error.WriteLine("Error: Too many xml files found in metabackup location; remove all but one and try again: {0}", metabackuplocation); Console.WriteLine("Must be exactly one *.xml file in metabackup location"); SetFailure(); return; } else if (xmlfiles.Length < 1) { Console.Error.WriteLine("Error: {0} not found in metabackup location: {1}", dfs.DFSXMLNAME, metabackuplocation); SetFailure(); return; } else //if (xmlfiles.Length == 1) { metabackupdfsxmlpath = xmlfiles[0]; } } else if (System.IO.File.Exists(metabackuplocation)) { Console.WriteLine("Error: must speicfy directory of metabackup, not file: {0}", metabackuplocation); SetFailure(); return; } else { Console.WriteLine("Error: metabackup directory not found: {0}", metabackuplocation); SetFailure(); return; } string newmaster; if (targetdspacepath.StartsWith(@"\\")) { int ixh = targetdspacepath.IndexOf('\\', 2); if (-1 == ixh) { Console.Error.WriteLine("Error: problem parsing network from path: {0}", targetdspacepath); SetFailure(); return; } newmaster = targetdspacepath.Substring(2, ixh - 2); } else { //newmaster = System.Net.Dns.GetHostName(); Console.WriteLine("Error: network path required for target dspace directory for surrogate: {0}", targetdspacepath); SetFailure(); return; } Console.WriteLine("Loading metabackup metadata...", metabackupdfsxmlpath); dfs mbdc; try { mbdc = dfs.ReadDfsConfig_unlocked(metabackupdfsxmlpath); } catch (Exception e) { Console.Error.WriteLine("Unable to read metadata from '{0}': {1}", metabackupdfsxmlpath, e.Message); SetFailure(); return; } string[] slaves = mbdc.Slaves.SlaveList.Split(';'); int threadcount = slaves.Length; if (threadcount > 15) { threadcount = 15; } string[] allmachines; { List<string> am = new List<string>(slaves.Length + 1); am.Add(newmaster); // Add surrogate first. for (int si = 0; si < slaves.Length; si++) { // Add slave if it's not the new surrogate. if (0 != string.Compare(IPAddressUtil.GetName(slaves[si]), IPAddressUtil.GetName(newmaster), StringComparison.OrdinalIgnoreCase)) { am.Add(slaves[si]); } } allmachines = am.ToArray(); } Console.WriteLine("Accessing target " + appname + " path {0} ...", targetdspacepath); if (!System.IO.File.Exists(targetdspacepath + @"\aelight.exe")) { Console.Error.WriteLine("Problem accessing target " + appname + " path '{0}': {1}", targetdspacepath, appname + " is not installed at this location"); SetFailure(); return; } try { // Run a little test to verify... string fp = targetdspacepath + "\\restoresurrogate." + Surrogate.SafeTextPath(System.Net.Dns.GetHostName()) + "." + Guid.NewGuid(); System.IO.File.WriteAllText(fp, "[" + DateTime.Now.ToString() + "] restoresurrogate command issued from " + System.Net.Dns.GetHostName() + " {7BCD3A7C-3FA6-466f-84CB-51D70BB2B686}" + Environment.NewLine); if (-1 == System.IO.File.ReadAllText(fp).IndexOf("{7BCD3A7C-3FA6-466f-84CB-51D70BB2B686}")) { System.IO.File.Delete(fp); throw new System.IO.IOException("Read verification error {7BCD3A7C-3FA6-466f-84CB-51D70BB2B686}"); } System.IO.File.Delete(fp); } catch (Exception e) { Console.Error.WriteLine("Problem accessing target " + appname + " path '{0}': {1}", targetdspacepath, e.Message); SetFailure(); return; } // So stopping services doesn't kill this instance. try { MakeInvincible(); } catch { } _CleanPidFile_unlocked(); if (stop) { Console.WriteLine(" Stopping services..."); MySpace.DataMining.Threading.ThreadTools<string>.Parallel( new Action<string>( delegate(string host) { try { Shell("sc \\\\" + host + " stop DistributedObjects"); } catch { } }), allmachines, threadcount); System.Threading.Thread.Sleep(1000 * 3); // Give a bit of extra time to shutdown. } { Console.WriteLine(" Restoring surrogate..."); Surrogate.SetNewMasterHost(newmaster); Surrogate.SetNewMetaLocation(targetdspacepath); Console.WriteLine(" Restoring jobs files..."); foreach (System.IO.FileInfo zdfi in (new System.IO.DirectoryInfo(metabackuplocation)).GetFiles("*.zd")) { System.IO.File.Copy(zdfi.FullName, targetdspacepath + @"\" + zdfi.Name, true); } try { string schedulerbackuplocation = newmetabackuppath; if (string.IsNullOrEmpty(schedulerbackuplocation)) { schedulerbackuplocation = null; } if (MySpace.DataMining.DistributedObjects.Scheduler.BackupRestore( metabackuplocation, targetdspacepath, schedulerbackuplocation)) { //Console.WriteLine("Restored scheduled and queued tasks"); } else { //Console.WriteLine("No scheduled or queued tasks to restore"); } } catch (System.IO.FileNotFoundException e) { Console.WriteLine("Warning: unable to restore scheduled and queued tasks, perhaps it was never backed up from before this feature."); Console.WriteLine("Message: {0}", e.Message); } mbdc.MetaBackup = newmetabackuppath; if (!string.IsNullOrEmpty(newmetabackuppath)) { EnsureMetaBackupLocation(mbdc); // Important! Only do this AFTER restoring everything from metabackup location! // Because the user might want to re-use the same directory. foreach (string fn in System.IO.Directory.GetFiles(mbdc.GetMetaBackupLocation())) { System.IO.File.Delete(fn); } } // Save mbdc to targetdspacepath Console.WriteLine(" Restoring metadata..."); try { System.IO.File.Delete(targetdspacepath + @"\dfs.xml"); } catch { } try { System.IO.File.Delete(targetdspacepath + @"\slave.dat"); } catch { } { // Updating slave.dat if found... // If no slave.dat, it's probably a participating surrogate. MySpace.DataMining.Threading.ThreadTools<string>.Parallel( new Action<string>( delegate(string slave) { try { // Delete any dfs.xml found, so this can also work as a move-surrogate feature. System.IO.File.Delete(Surrogate.NetworkPathForHost(slave) + @"\dfs.xml"); } catch { } try { string sdfp = Surrogate.NetworkPathForHost(slave) + @"\slave.dat"; if (System.IO.File.Exists(sdfp)) { string[] sd = System.IO.File.ReadAllLines(sdfp); string sdfpnew = sdfp + ".new"; using (System.IO.StreamWriter sw = System.IO.File.CreateText(sdfpnew)) { bool fm = false; for (int i = 0; i < sd.Length; i++) { string line = sd[i]; if (line.StartsWith("master=", StringComparison.OrdinalIgnoreCase)) { line = "master=" + newmaster; fm = true; } sw.WriteLine(line); } if (!fm) { throw new Exception("Invalid slave.dat on " + slave + " - master=host entry not found"); } } System.IO.File.Delete(sdfp); System.IO.File.Move(sdfpnew, sdfp); } else { // If it doesn't exist, write out a new one, but not if it is surrogate. if (0 != string.Compare(IPAddressUtil.GetName(newmaster), IPAddressUtil.GetName(slave), StringComparison.OrdinalIgnoreCase)) { System.IO.File.WriteAllText(sdfp, "master=" + newmaster + Environment.NewLine); } } } catch (Exception e) { lock (slaves) { Console.Error.WriteLine("WARNING: Error on machine {0}: {1}", slave, e.Message); } } }), slaves, threadcount); } { // Fix old surrogate jobs-files references. foreach (dfs.DfsFile df in mbdc.Files) { if (0 == string.Compare(df.Type, DfsFileTypes.JOB, StringComparison.OrdinalIgnoreCase)) { foreach (dfs.DfsFile.FileNode fn in df.Nodes) { fn.Host = newmaster; } } } } // Write new dfs.xml... UpdateDfsXml(mbdc, targetdspacepath + @"\" + dfs.DFSXMLNAME, mbdc.GetMetaBackupLocation()); } if (stop) { Console.WriteLine(" Starting services..."); MySpace.DataMining.Threading.ThreadTools<string>.Parallel( new Action<string>( delegate(string host) { try { Shell("sc \\\\" + host + " start DistributedObjects"); } catch { } }), allmachines, threadcount); System.Threading.Thread.Sleep(1000 * 1); // Give a sec to startup. } Console.WriteLine("Done"); if (!string.IsNullOrEmpty(newmetabackuppath)) { Console.WriteLine("Type the following to backup the current meta-data:"); Console.WriteLine(" {0} metabackup -backup-now", appname); } else { Console.WriteLine("Use the metabackup command to re-enable metabackups"); } } break; case "metabackup": try { dfs dc = LoadDfsConfig(); if (args.Length > 1) { EnterAdminCmd(); if (0 == string.Compare("-backup-now", args[1], true) || 0 == string.Compare("-metabackup-now", args[1], true)) { string metabackupdir = dc.GetMetaBackupLocation(); if (null == metabackupdir) { Console.Error.WriteLine("Cannot backup, no meta backup location is set"); SetFailure(); return; } Console.WriteLine("Backing up all meta-data and jobs files..."); //foreach (dfs.DfsFile df in dc.Files) int njobs = 0; MySpace.DataMining.Threading.ThreadTools<dfs.DfsFile>.Parallel( new Action<dfs.DfsFile>( delegate(dfs.DfsFile df) { try { if (0 == string.Compare(DfsFileTypes.JOB, df.Type, StringComparison.OrdinalIgnoreCase)) { bool goodnode = 1 == df.Nodes.Count; string mblfn = goodnode ? df.Nodes[0].Name : "<null>"; string mblfp = metabackupdir + @"\" + mblfn; Console.WriteLine(" dfs://{0} -> {1}", df.Name, mblfp); if (!goodnode) { throw new Exception("dfs://" + df.Name + " has invalid data node"); } string mblfpx = mblfp + "$"; System.IO.File.Copy(Surrogate.NetworkPathForHost(df.Nodes[0].Host) + @"\" + df.Nodes[0].Name, mblfpx, true); try { System.IO.File.Delete(mblfp); } catch { } System.IO.File.Move(mblfpx, mblfp); System.Threading.Interlocked.Increment(ref njobs); } } catch (Exception eb) { LogOutputToFile(eb.ToString()); Console.Error.WriteLine(eb.Message); } }), dc.Files, 15); Console.WriteLine("Backed up {0} jobs files", njobs); { MySpace.DataMining.DistributedObjects.Scheduler.SetBackupLocation(metabackupdir); Console.WriteLine("Backed up schedule and queue tasks"); } } else if ("-" == args[1]) { string oldmetabackup = dc.GetMetaBackupLocation(); dc.MetaBackup = null; { UpdateDfsXml(dc); { MySpace.DataMining.DistributedObjects.Scheduler.SetBackupLocation(null); } Console.WriteLine("Setting updated successfully"); Console.WriteLine("Backups will no longer be saved"); Console.WriteLine("Existing backups are still located at: {0}", oldmetabackup); } } else { string oldmetabackup = dc.GetMetaBackupLocation(); string newmetabackup = args[1]; if (!newmetabackup.StartsWith(@"\\")) { newmetabackup = Surrogate.LocalPathToNetworkPath(newmetabackup, Surrogate.MasterHost); } dc.MetaBackup = newmetabackup; { EnsureMetaBackupLocation(dc); // Throws if problem, bailing out before saving change. foreach (string fn in System.IO.Directory.GetFiles(dc.GetMetaBackupLocation())) { System.IO.File.Delete(fn); } UpdateDfsXml(dc); // Only if EnsureMetaBackupLocation was successful! { MySpace.DataMining.DistributedObjects.Scheduler.SetBackupLocation(dc.GetMetaBackupLocation()); } Console.WriteLine("Setting updated successfully"); Console.WriteLine("Type the following to backup the current meta-data:"); Console.WriteLine(" {0} metabackup -backup-now", appname); } } } else { string metabackupdir = dc.GetMetaBackupLocation(); Console.WriteLine("Meta backup location is: {0}", (null == metabackupdir) ? "<null>" : metabackupdir); } } catch (Exception e) { LogOutputToFile("{Metabackup} " + e.ToString()); Console.Error.WriteLine("Metabackup error: {0}", e.Message); SetFailure(); return; } break; case "metadelete": case "metadel": case "metarm": case "removemetafile": if (args.Length < 2) { Console.Error.WriteLine("Invalid arguments for " + args[0]); ShowUsage(); } else { DfsMetaDelete(args[1]); } break; case "metaremovemachine": case "removemetamachine": case "removemetahost": case "removemetanode": case "metaremove": if (args.Length < 2) { Console.Error.WriteLine("Invalid arguments for " + args[0]); ShowUsage(); } else { EnterAdminCmd(); string RMHost = null; bool DontTouchRMHost = false; bool RMForce = false; for (int iarg = 1; iarg < args.Length; iarg++) { if (args[iarg][0] == '-') { switch (args[iarg]) { case "-s": DontTouchRMHost = true; break; case "-f": RMForce = true; break; default: Console.Error.WriteLine("Warning: Unknown switch: {0}", args[iarg]); break; } } else { if (null != RMHost) { Console.Error.WriteLine("Too many hosts specified: {0} and {1}", RMHost, args[iarg]); SetFailure(); return; } RMHost = args[iarg]; } } MetaRemoveMachine(RMHost, DontTouchRMHost, RMForce); } break; case "slavelogfind": { if (args.Length > 1) { string what = args[1]; dfs dc = LoadDfsConfig(); string[] slaves = dc.Slaves.SlaveList.Split(';'); MySpace.DataMining.Threading.ThreadTools<string>.Parallel( new Action<string>( delegate(string slave) { try { string netdir = Surrogate.NetworkPathForHost(slave); System.IO.FileInfo fi = new System.IO.FileInfo(netdir + @"\slave-log.txt"); if (fi.Exists) { long lastmatchline = -1; string lastmatchstring = null; string line; long curline = 0; using (System.IO.StreamReader sr = fi.OpenText()) { while (null != (line = sr.ReadLine())) { curline++; if (-1 != line.IndexOf(what, StringComparison.OrdinalIgnoreCase)) { lastmatchline = curline; lastmatchstring = line; } } } if (-1 != lastmatchline) { lock (slaves) { Console.WriteLine("{0}({1}): {2}", fi.FullName, lastmatchline, lastmatchstring); } } } } catch (Exception e) { lock (slaves) { Console.Error.WriteLine("Error with {0}: {1}", slave, e.Message); } } } ), slaves, slaves.Length); } else { Console.Error.WriteLine("String to find expected"); SetFailure(); return; } } Console.WriteLine("Done"); break; case "slaveloglargest": { dfs dc = LoadDfsConfig(); string[] slaves = dc.Slaves.SlaveList.Split(';'); long largestsize = -1; string largestpath = null; MySpace.DataMining.Threading.ThreadTools<string>.Parallel( new Action<string>( delegate(string slave) { try { string netdir = Surrogate.NetworkPathForHost(slave); System.IO.FileInfo fi = new System.IO.FileInfo(netdir + @"\slave-log.txt"); if (fi.Exists) { long sz = fi.Length; lock (slaves) { if (sz > largestsize) { largestpath = fi.FullName; largestsize = sz; } } } } catch (Exception e) { lock (slaves) { Console.Error.WriteLine("Error with {0}: {1}", slave, e.Message); } } } ), slaves, slaves.Length); if (-1 == largestsize) { Console.Error.WriteLine("None found"); } else { Console.WriteLine("{0} contains the largest slave log at {1} ({2} B)", largestpath, Surrogate.GetFriendlyByteSize(largestsize), largestsize); } } break; case "slavelogdelete": { dfs dc = LoadDfsConfig(); string[] slaves = dc.Slaves.SlaveList.Split(';'); MySpace.DataMining.Threading.ThreadTools<string>.Parallel( new Action<string>( delegate(string slave) { try { string netdir = Surrogate.NetworkPathForHost(slave); System.IO.File.Delete(netdir + @"\slave-log.txt"); lock (slaves) { Console.Write('.'); } } catch (Exception e) { } } ), slaves, slaves.Length); Console.WriteLine(); Console.WriteLine("Done"); } break; case "clearlogs": { dfs dc = LoadDfsConfig(); string[] slaves = dc.Slaves.SlaveList.Split(';'); const int MAX_TRIES = 10; List<string> errs = new List<string>(slaves.Length); MySpace.DataMining.Threading.ThreadTools<string>.Parallel( new Action<string>( delegate(string slave) { int triesremain = MAX_TRIES; string fn = Surrogate.NetworkPathForHost(slave) + @"\slave-log.txt"; for (; ; ) { try { System.IO.File.Delete(fn); lock (slaves) { Console.Write('.'); } return; } catch (Exception e) { if (--triesremain <= 0) { lock (slaves) { errs.Add(slave); } break; } } } } ), slaves, slaves.Length); Console.WriteLine(); if (errs.Count > 0) { Console.WriteLine("Errors encountered while trying to clear logs from these machines:"); foreach (string e in errs) { Console.WriteLine(e); } } else { Console.WriteLine("Done"); } } break; case "slaveloglist": { dfs dc = LoadDfsConfig(); string[] slaves = dc.Slaves.SlaveList.Split(';'); List<string> list = new List<string>(slaves.Length); MySpace.DataMining.Threading.ThreadTools<string>.Parallel( new Action<string>( delegate(string slave) { try { string netdir = Surrogate.NetworkPathForHost(slave); System.IO.FileInfo fi = new System.IO.FileInfo(netdir + @"\slave-log.txt"); if (fi.Exists) { lock (slaves) { list.Add(fi.Length.ToString().PadLeft(8) + " B " + fi.FullName); } } } catch (Exception e) { } } ), slaves, slaves.Length); list.Sort(); foreach (string x in list) { Console.WriteLine(x); } } break; case "viewlog": case "viewlogs": { int maxentries = 1000; string machine = null; string findsjid = null; string findjobfile = null; if (args.Length > 1) { for (int i = 1; i < args.Length; i++) { string arg = args[i]; string optval = ""; string optname = ""; int del = arg.IndexOf("="); if (del > -1) { optname = arg.Substring(0, del).ToLower(); optval = arg.Substring(del + 1); } switch (optname) { case "machine": machine = optval; break; case "count": try { int _max = Int32.Parse(optval); if (_max > 0) { maxentries = _max; } } catch { } break; case "jobid": case "jid": case "jobidentifier": { findsjid = optval; long findjid; if (!long.TryParse(findsjid, out findjid) || findjid < 1) { Console.Error.WriteLine("Invalid JobID: " + findsjid); return; } } break; case "jobfile": case "jobsfile": findjobfile = optval; break; default: Console.Error.WriteLine("Invalid argument for viewlogs"); return; } } } string[] hosts = null; if (machine == null) { dfs dc = LoadDfsConfig(); hosts = dc.Slaves.SlaveList.Split(';'); } else { hosts = new string[1] { machine }; } List<string> logpaths = new List<string>(); MySpace.DataMining.Threading.ThreadTools<string>.Parallel( new Action<string>( delegate(string host) { string fn = Surrogate.NetworkPathForHost(host) + @"\slave-log.txt"; if (System.IO.File.Exists(fn)) { lock (logpaths) { logpaths.Add(fn); } } }), hosts, hosts.Length); if (logpaths.Count == 0) { Console.Error.WriteLine("No log file is found."); return; } const int MAXBYTE = 1024 * 1024 * 64; int maxbytepart = MAXBYTE / logpaths.Count; int maxentriespart = maxentries / logpaths.Count; if (maxentries % logpaths.Count != 0) { maxentriespart++; } List<string[]> allentries = new List<string[]>(logpaths.Count); MySpace.DataMining.Threading.ThreadTools<string>.Parallel( new Action<string>( delegate(string logpath) { if (!System.IO.File.Exists(logpath)) { return; } string token = "----------------------------------------------------------------" + Environment.NewLine + Environment.NewLine; System.IO.FileStream fs = null; try { fs = new System.IO.FileStream(logpath, System.IO.FileMode.Open, System.IO.FileAccess.Read, System.IO.FileShare.ReadWrite); if (fs.Length > maxbytepart * 2) { fs.Position = fs.Length - maxbytepart; } int ib = 0; List<long> idx = new List<long>(); long entryStart = 0; while ((ib = fs.ReadByte()) > -1) { if (ib == (int)token[0]) { bool istoken = true; for (int i = 1; i < token.Length; i++) { if (fs.ReadByte() != (int)token[i]) { istoken = false; break; } } if (istoken) { idx.Add(entryStart); entryStart = fs.Position; } } } if (idx.Count == 0) { return; } long flen = fs.Length; int startidx = idx.Count > maxentriespart ? idx.Count - maxentriespart : 0; long offset = idx[startidx]; long buflen = flen - offset; while (buflen > maxbytepart && startidx < idx.Count - 1) { startidx++; offset = idx[startidx]; buflen = flen - offset; } if (buflen > maxbytepart) { throw new Exception("log too large"); } byte[] buf = new byte[buflen]; fs.Position = offset; fs.Read(buf, 0, buf.Length); fs.Close(); fs = null; string[] entries = new string[idx.Count - startidx]; for (int i = startidx; i < idx.Count; i++) { int pos = (int)(idx[i] - offset); int bytecount = 0; if (i < idx.Count - 1) { bytecount = (int)(idx[i + 1] - offset - pos); } else { bytecount = buf.Length - pos; } entries[i - startidx] = System.Text.Encoding.ASCII.GetString(buf, pos, bytecount); } lock (allentries) { allentries.Add(entries); } } catch { if (fs != null) { fs.Close(); fs = null; } throw; } } ), logpaths, logpaths.Count); if (allentries.Count == 0) { Console.Error.WriteLine("No log entries found."); return; } Console.WriteLine("-"); Console.WriteLine("Log entries:"); Console.WriteLine("-"); { List<KeyValuePair<DateTime, string>> list = new List<KeyValuePair<DateTime, string>>(); foreach (string[] entries in allentries) { foreach (string entry in entries) { int del = entry.IndexOf('M'); //AM or PM string sdate = entry.Substring(1, del); try { DateTime dt = DateTime.Parse(sdate); list.Add(new KeyValuePair<DateTime, string>(dt, entry)); } catch { } } } list.Sort(delegate(KeyValuePair<DateTime, string> x, KeyValuePair<DateTime, string> y) { return x.Key.CompareTo(y.Key); }); int start = list.Count > maxentries ? list.Count - maxentries : 0; int dispcount = 0; for (int i = start; i < list.Count; i++) { string log = list[i].Value; if (null != findsjid | null != findjobfile) { string logsjid; string logjobdesc; { const string JSTART = "[JobID:"; int ij = log.LastIndexOf(JSTART); if (-1 == ij) { continue; } ij += JSTART.Length; const string JEND = "] "; int ijend = log.IndexOf(JEND, ij); if (-1 == ijend) { continue; } logsjid = log.Substring(ij, ijend - ij); logjobdesc = log.Substring(ijend + JEND.Length); } if (null != findsjid) { if (logsjid != findsjid) { continue; } } if (null != findjobfile) { if (findjobfile.Length >= logjobdesc.Length || ' ' != logjobdesc[findjobfile.Length]) { continue; } if (!logjobdesc.StartsWith(findjobfile, true, null)) { continue; } } } Console.Write(log); dispcount++; } Console.WriteLine("-"); Console.WriteLine("Entries displayed: {0}", dispcount); Console.WriteLine("-"); } } break; case "xhealth": { // DFS sanity check... dfs dc = LoadDfsConfig(); Dictionary<string, bool> dd = new Dictionary<string, bool>(new Surrogate.CaseInsensitiveEqualityComparer()); foreach (dfs.DfsFile df in dc.Files) { if (dd.ContainsKey(df.Name)) { Console.Error.WriteLine("Error: duplicate file '{0}' detected in DFS; this file should be deleted or xrepair", df.Name); SetFailure(); return; } dd.Add(df.Name, true); } } Console.WriteLine("Done"); break; case "xrepair": { // DFS sanity check... for (bool run = true; run; ) { run = false; dfs dc = LoadDfsConfig(); Dictionary<string, bool> dd = new Dictionary<string, bool>(new Surrogate.CaseInsensitiveEqualityComparer()); foreach (dfs.DfsFile df in dc.Files) { if (dd.ContainsKey(df.Name)) { Console.WriteLine("Deleting '{0}'", df.Name); DfsDelete(df.Name); run = true; break; } dd.Add(df.Name, true); } } } Console.WriteLine("Done"); break; case "nearprime": if (args.Length > 1) { int x = int.Parse(args[1]); if (x <= 0) { Console.Error.WriteLine("Please enter a positive number"); } else { Console.WriteLine("{0} is {1}prime", x, IsPrime(x) ? "" : "not "); if (x > 2) { Console.WriteLine("{0} is nearest prime less than {1}", NearestPrimeLE(x - 1), x); } Console.WriteLine("{0} is nearest prime greater than {1}", NearestPrimeGE(x + 1), x); } } else { Console.Error.WriteLine("What number?"); } break; // Obsolete, use servicestatusall... case "status": { string[] hosts; if (args.Length > 1) { hosts = args[1].Split(';', ','); } else { dfs dc = LoadDfsConfig(); hosts = dc.Slaves.SlaveList.Split(';'); } int threadcount = hosts.Length; if (threadcount > 15) { threadcount = 15; } MySpace.DataMining.Threading.ThreadTools<string>.Parallel( new Action<string>( delegate(string host) { bool found = false; try { string[] exlines = Shell("sc \\\\" + host + " query DistributedObjects").Split('\n'); for (int iex = 0; iex < exlines.Length; iex++) { var x = exlines[iex].Trim(); if (x.Length > 6 && x.Substring(0, 6) == "STATE ") { var state = x.Substring(6); int ils = state.LastIndexOf(' '); if (-1 != ils) { state = state.Substring(ils + 1); } //state = state.Replace(" ", ""); //state = state.Trim(); if ("RUNNING" == state) { lock (hosts) { Console.WriteLine(host + ": " + state); } } else { lock (hosts) { Console.WriteLine(host + ": " + state + " *** WARNING ***"); } } found = true; } } } catch { } if (!found) { lock (hosts) { Console.WriteLine(host + ": FAILED *** ERROR ***"); } } } ), hosts, threadcount); } break; case "callstack": if (args.Length <= 1) { Console.Error.WriteLine("Invalid syntax for command: callstack: not enough arguments"); SetFailure(); return; } if (0 == string.Compare(args[1], "worker", true) || 0 == string.Compare(args[1], "workers", true)) { if (args.Length <= 3) { Console.Error.WriteLine("Invalid syntax for command: callstack worker: not enough arguments"); SetFailure(); return; } string sjidcs = args[2]; long jidcs = -1; #if DEBUG if ("*" != sjidcs) #endif { if (!long.TryParse(sjidcs, out jidcs) || jidcs < 0) { Console.Error.WriteLine("callstack: invalid Job Identifier: " + sjidcs); SetFailure(); return; } sjidcs = jidcs.ToString(); // Normalize. } string hostcs = args[3]; { string netpath = Surrogate.NetworkPathForHost(hostcs); string[] fps = System.IO.Directory.GetFiles(netpath, "*.j" + sjidcs + ".slave.pid"); if (0 == fps.Length) { Console.Error.WriteLine("No workers for Job Identifier {0} found on host {1}", sjidcs, hostcs); } else { System.Text.RegularExpressions.Regex rex = new System.Text.RegularExpressions.Regex(@"\\(\d+)\.j(\d+).slave.pid$", System.Text.RegularExpressions.RegexOptions.Compiled | System.Text.RegularExpressions.RegexOptions.Singleline | System.Text.RegularExpressions.RegexOptions.IgnoreCase); List<KeyValuePair<string, string>> slavecs = new List<KeyValuePair<string, string>>(fps.Length); // Key=slave; Value=jid for (int i = 0; i < fps.Length; i++) { System.Text.RegularExpressions.Match m = rex.Match(fps[i]); if (!m.Success) { throw new Exception("Internal error: slave pid file mismatch"); } System.Text.RegularExpressions.GroupCollection gc = m.Groups; slavecs.Add(new KeyValuePair<string, string>(gc[1].Value, gc[2].Value)); } Console.WriteLine("Waiting on {0} worker callstack{1}...", slavecs.Count, slavecs.Count == 1 ? "" : "s"); for (int i = 0; i < slavecs.Count; i++) { string path = netpath + @"\" + slavecs[i].Key + ".trace"; for (; System.IO.File.Exists(path); System.Threading.Thread.Sleep(1000 * 1)) { } string tpath = sjid + "tracing.slave" + slavecs[i].Key + ".tof"; System.IO.File.WriteAllText(path, tpath + Environment.NewLine + "."); } for (int tries = 0; slavecs.Count > 0; tries++) { if (0 != tries) { System.Threading.Thread.Sleep(1000 * 3); } for (int i = 0; i < slavecs.Count; i++) { string tpath = netpath + @"\" + sjid + "tracing.slave" + slavecs[i].Key + ".tof"; { string toutput = ReadTraceFromFile(tpath); if (null == toutput) { if (0 == System.IO.Directory.GetFiles(netpath, slavecs[i].Key + ".j*.slave.pid").Length) { Console.WriteLine(); Console.WriteLine("Worker no longer running"); try { System.IO.File.Delete(netpath + @"\" + slavecs[i].Key + ".trace"); } catch { } slavecs.RemoveAt(i); i--; #if DEBUG //System.Diagnostics.Debugger.Launch(); #endif } } else { Console.WriteLine(); Console.WriteLine(toutput); try { System.IO.File.Delete(tpath); } catch { } slavecs.RemoveAt(i); i--; } } } } Console.WriteLine(); } } } else if (0 == string.Compare(args[1], "surrogate", true)) { if (args.Length <= 2) { Console.Error.WriteLine("Invalid syntax for command: callstack surrogate: not enough arguments"); SetFailure(); return; } string sjidcs = args[2]; long jidcs; if (!long.TryParse(sjidcs, out jidcs) || jidcs < 0) { Console.Error.WriteLine("callstack: invalid Job Identifier: " + sjidcs); SetFailure(); return; } sjidcs = jidcs.ToString(); // Normalize. string hostcs = System.Net.Dns.GetHostName(); { string netpath = Surrogate.NetworkPathForHost(hostcs); string jidcsfp = netpath + @"\" + sjidcs + ".jid"; string saelightpid = null; int aelightpid = -1; for (; ; System.Threading.Thread.Sleep(1000 * 3)) { try { string jidcscontent; using (System.IO.FileStream f = new System.IO.FileStream(jidcsfp, System.IO.FileMode.Open, System.IO.FileAccess.Read, System.IO.FileShare.Read | System.IO.FileShare.Write | System.IO.FileShare.Delete)) { System.IO.StreamReader sr = new System.IO.StreamReader(f); jidcscontent = sr.ReadToEnd(); sr.Close(); } { // If any of this fails, try again; // it might not be written fully yet. int ipidequ = 0; for (; ; ) { ipidequ = jidcscontent.IndexOf("pid=", ipidequ); if (-1 == ipidequ) { break; } if (0 == ipidequ || '\n' == jidcscontent[ipidequ - 1]) { // Ensure newline to ensure the pid= entry was fully written. int iendpid = jidcscontent.IndexOf('\n', ipidequ + 4); if (-1 != iendpid) { saelightpid = jidcscontent.Substring(ipidequ + 4, iendpid - (ipidequ + 4)).Trim(); aelightpid = int.Parse(saelightpid); saelightpid = aelightpid.ToString(); // Normalize. break; } else { //ipidequ += 4; //continue; ipidequ = -1; break; } } else { ipidequ += 4; continue; } } if (-1 == ipidequ) { continue; } } } catch (System.IO.FileNotFoundException) { } catch { continue; } break; } if (null == saelightpid) { Console.Error.WriteLine("No surrogate process for Job Identifier {0}", sjidcs); } else { Console.WriteLine("Waiting on surrogate callstacks..."); { string path = netpath + @"\" + saelightpid + ".trace"; for (; System.IO.File.Exists(path); System.Threading.Thread.Sleep(1000 * 1)) { } string tpath = sjid + "tracing.aelight" + saelightpid + ".tof"; System.IO.File.WriteAllText(path, tpath + Environment.NewLine + "."); } for (int tries = 0; ; tries++) { if (0 != tries) { System.Threading.Thread.Sleep(1000 * 3); } { string tpath = netpath + @"\" + sjid + "tracing.aelight" + saelightpid + ".tof"; { string toutput = ReadTraceFromFile(tpath); if (null == toutput) { if (!System.IO.File.Exists(jidcsfp)) { Console.WriteLine(); Console.WriteLine("Surrogate process no longer running"); try { System.IO.File.Delete(netpath + @"\" + saelightpid + ".trace"); } catch { } break; } } else { Console.WriteLine(); Console.WriteLine(toutput); try { System.IO.File.Delete(tpath); } catch { } break; } } } } Console.WriteLine(); } } } else if (0 == string.Compare(args[1], "driver", true)) { if (args.Length <= 2) { Console.Error.WriteLine("Invalid syntax for command: callstack worker: not enough arguments"); SetFailure(); return; } string hostcs = args[2]; { string netpath = Surrogate.NetworkPathForHost(hostcs); { int driverpid; string sdriverpid; //Console.Error.WriteLine("No surrogate process for Job Identifier {0}", sjidcs); for (; ; System.Threading.Thread.Sleep(1000 * 1)) { try { string driverfp = netpath + @"\driver.pid"; string driverfcontent; using (System.IO.FileStream f = new System.IO.FileStream(driverfp, System.IO.FileMode.Open, System.IO.FileAccess.Read, System.IO.FileShare.Read | System.IO.FileShare.Write | System.IO.FileShare.Delete)) { System.IO.StreamReader sr = new System.IO.StreamReader(f); driverfcontent = sr.ReadToEnd(); sr.Close(); } { int inl = driverfcontent.IndexOf('\n'); // Continue if no \n. if (-1 != inl) { sdriverpid = driverfcontent.Substring(0, inl).Trim(); break; } } } catch (System.IO.FileNotFoundException) { Console.Error.WriteLine("No driver process for host {0}", hostcs); SetFailure(); return; } } driverpid = int.Parse(sdriverpid); sdriverpid = driverpid.ToString(); // Normalize. Console.WriteLine("Waiting on driver callstack..."); { string path = netpath + @"\" + sdriverpid + ".trace"; for (; System.IO.File.Exists(path); System.Threading.Thread.Sleep(1000 * 1)) { } string tpath = sjid + "tracing.driver" + sdriverpid + ".tof"; System.IO.File.WriteAllText(path, tpath + Environment.NewLine + "."); } for (int tries = 0; ; tries++) { if (0 != tries) { System.Threading.Thread.Sleep(1000 * 3); } { string tpath = netpath + @"\" + sjid + "tracing.driver" + sdriverpid + ".tof"; { string toutput = ReadTraceFromFile(tpath); if (null == toutput) { if (!System.IO.File.Exists(netpath + @"\" + sdriverpid + ".trace")) { Console.WriteLine(); Console.WriteLine("Driver no longer running"); try { System.IO.File.Delete(netpath + @"\" + sdriverpid + ".trace"); } catch { } break; #if DEBUG //System.Diagnostics.Debugger.Launch(); #endif } } else { Console.WriteLine(); Console.WriteLine(toutput); try { System.IO.File.Delete(tpath); } catch { } break; } } } } Console.WriteLine(); } } } else { Console.Error.WriteLine("Invalid syntax for command: callstack: didn't expect " + args[1]); SetFailure(); return; } break; case "healthst": case "healthmt": case "health": { dfs dc = LoadDfsConfig(); bool all = false; bool verify = false; string[] hosts = null; bool plugininfo = false; bool mt = "healthst" != act; if (args.Length >= 2) { for (int i = 1; i < args.Length; i++) { switch (args[i]) { case "-a": all = true; break; case "-v": verify = true; break; case "-mt": mt = true; break; #if DEBUG case "-pi": plugininfo = true; break; #endif default: { string shosts = args[i]; if (shosts.StartsWith("@")) { hosts = Surrogate.GetHostsFromFile(shosts.Substring(1)); } else { hosts = shosts.Split(';', ','); } } break; } } } List<KeyValuePair<string, Surrogate.HealthMethod>> plugins = new List<KeyValuePair<string, Surrogate.HealthMethod>>(); try { string cacdir = null; List<dfs.DfsFile> healthdlls = dc.FindAll("QizmtHealth*.dll"); if (plugininfo) { Console.WriteLine("*PluginInfo: Found {0} matching plugin DLLs in DFS", healthdlls.Count); } foreach (dfs.DfsFile healthplugin in healthdlls) { if (null == cacdir) { #if HEALTHPLUGIN_FINDCAC foreach (string cdh in dc.Slaves.SlaveList.Split(',', ';')) { System.Threading.Thread cdthd = new System.Threading.Thread( new System.Threading.ThreadStart( delegate() { if (Surrogate.IsHealthySlaveMachine(cdh)) { string cddir = Surrogate.NetworkPathForHost(cdh) + @"\" + dfs.DLL_DIR_NAME; if (System.IO.Directory.Exists(cddir)) { cacdir = cddir; } } })); cdthd.Start(); cdthd.Join(1000 * 30); if (null != cacdir) { break; } } #else // Needs participating surrogate. string cddir = AELight_Dir + @"\" + dfs.DLL_DIR_NAME; if (System.IO.Directory.Exists(cddir)) { cacdir = cddir; } if (null == cacdir) { throw new Exception("Unable to locate CAC directory on surrogate (must be participating surrogate for health plugins)"); } #endif } if (null == cacdir) { throw new Exception("Unable to locate healthy CAC directory"); } if (plugininfo) { Console.WriteLine("*PluginInfo: Found CAC dir at: {0}", cacdir); } bool foundhealthmethod = false; try { System.Reflection.Assembly hasm = System.Reflection.Assembly.LoadFrom(cacdir + @"\" + healthplugin.Name); foreach (Type t in hasm.GetTypes()) { if (-1 != t.Name.IndexOf("Health", StringComparison.OrdinalIgnoreCase)) { System.Reflection.MethodInfo mi = t.GetMethod("CheckHealth", System.Reflection.BindingFlags.Public | System.Reflection.BindingFlags.Static); if (null != mi) { Surrogate.HealthMethod hm = (Surrogate.HealthMethod)Delegate.CreateDelegate(typeof(Surrogate.HealthMethod), mi); plugins.Add(new KeyValuePair<string, Surrogate.HealthMethod>(healthplugin.Name + " " + t.Name, hm)); foundhealthmethod = true; if (plugininfo) { Console.WriteLine("*PluginInfo: CheckHealth method found: {0} {1}", healthplugin.Name, t.Name); } } } } if (!foundhealthmethod) { throw new Exception("Did not find a Health public class with CheckHealth public static method (HealthMethod)"); } } catch (Exception epl) { throw new Exception("Unable to use plugin " + healthplugin.Name + ": " + epl.Message, epl); } } } catch (Exception e) { Console.Error.WriteLine("Health plugin error: " + e.Message); } if (null == hosts) { hosts = dc.Slaves.SlaveList.Split(';'); } #if DEBUG //System.Threading.Thread.Sleep(1000 * 8); #endif int nthreads = 1; if (mt) { nthreads = hosts.Length; if (nthreads > 15) { nthreads = 15; } } { if (all) { Console.WriteLine("[Machines Health]"); } int badones = 0; //for (int si = 0; si < hosts.Length; si++) MySpace.DataMining.Threading.ThreadTools<string>.Parallel( new Action<string>( delegate(string host) { //string host = hosts[si]; string reason = null; { System.Threading.Thread thd = new System.Threading.Thread( new System.Threading.ThreadStart( delegate() { if (!all) // Only do this here if not -a because it'll be done later in more detail. { foreach (KeyValuePair<string, Surrogate.HealthMethod> plugin in plugins) { Surrogate.HealthMethod hm = plugin.Value; if (Surrogate.SafeCallHealthMethod(hm, host, null, out reason)) { reason = null; } else { badones++; break; } } } if (null == reason) { if (Surrogate.IsHealthySlaveMachine(host, out reason)) { reason = null; } else { System.Threading.Interlocked.Increment(ref badones); } } })); thd.IsBackground = true; thd.Start(); const int healthtimeoutsecs = 30; if (!thd.Join(1000 * healthtimeoutsecs)) { try { thd.Abort(); } catch { } reason = "Timed out (" + healthtimeoutsecs + " seconds)"; } } if (reason != null) { Console.WriteLine(" {0}: {1}", host, reason); } }), hosts, nthreads); Console.WriteLine(" {0}% healthy", Math.Floor((double)(hosts.Length - badones) * 100.0 / (double)hosts.Length)); } if (all) { Console.WriteLine("[DFS Health]"); int badones = 0; int totalchecked = 0; byte[] one = new byte[1]; //foreach (dfs.DfsFile df in dc.Files) MySpace.DataMining.Threading.ThreadTools<dfs.DfsFile>.Parallel( new Action<dfs.DfsFile>( delegate(dfs.DfsFile df) { string dfType = df.Type; if (0 == string.Compare(dfType, DfsFileTypes.NORMAL, StringComparison.OrdinalIgnoreCase) || 0 == string.Compare(dfType, DfsFileTypes.BINARY_RECT, StringComparison.OrdinalIgnoreCase) || 0 == string.Compare(dfType, DfsFileTypes.JOB, StringComparison.OrdinalIgnoreCase)) { totalchecked++; string msg = null; // Note: doesn't print Success. bool thisbad = false; MySpace.DataMining.Threading.ThreadTools<dfs.DfsFile.FileNode>.Parallel( new Action<dfs.DfsFile.FileNode>( delegate(dfs.DfsFile.FileNode fn) { if (thisbad) { // Only one error per DFS file. return; } string onhost = null; try { string[] fnHosts = fn.Host.Split(';'); { if (0 == string.Compare(dfType, DfsFileTypes.NORMAL, StringComparison.OrdinalIgnoreCase) || 0 == string.Compare(dfType, DfsFileTypes.BINARY_RECT, StringComparison.OrdinalIgnoreCase)) { if (fnHosts.Length < dc.Replication) { throw new Exception("DFS file '" + df.Name + "' only has " + fnHosts.Length.ToString() + " replicates (chunk '" + fn.Name + "')"); } } } { Dictionary<string, bool> hd = new Dictionary<string, bool>(new Surrogate.CaseInsensitiveEqualityComparer()); foreach (string chost in fnHosts) { onhost = chost; string xchost = IPAddressUtil.GetName(chost); if (hd.ContainsKey(xchost)) { throw new Exception("DFS file '" + df.Name + "' has invalid replicate data: multiple replicates on a single machine"); } hd.Add(xchost, true); } onhost = null; } { foreach (string chost in fnHosts) { onhost = chost; using (System.IO.FileStream fs = new System.IO.FileStream(Surrogate.NetworkPathForHost(chost) + @"\" + fn.Name, System.IO.FileMode.Open, System.IO.FileAccess.Read, System.IO.FileShare.Read, 1)) // bufferSize=1 { // Note: multiple threads writing to 'one' but I don't need it. fs.Read(one, 0, 1); } } onhost = null; } } catch (Exception e) { lock (df) { thisbad = true; msg = e.Message; if (null != onhost) { msg += " (host " + onhost + ")"; } } } }), df.Nodes, hosts.Length); if (thisbad) { badones++; } if (msg != null) { Console.WriteLine(" {0}: {1}", df.Name, msg); } } }), dc.Files, nthreads); int percent = 100; if (totalchecked > 0) { percent = (int)Math.Floor((double)(totalchecked - badones) * 100.0 / (double)totalchecked); } Console.WriteLine(" {0}% healthy", percent); foreach (KeyValuePair<string, Surrogate.HealthMethod> plugin in plugins) { Console.WriteLine("[{0}]", plugin.Key); Surrogate.HealthMethod hm = plugin.Value; badones = 0; MySpace.DataMining.Threading.ThreadTools<string>.Parallel( new Action<string>( delegate(string host) { string reason; if (Surrogate.SafeCallHealthMethod(hm, host, null, out reason)) { reason = null; } else { badones++; } if (reason != null) { Console.WriteLine(" {0}: {1}", host, reason); } }), hosts, nthreads); Console.WriteLine(" {0}% healthy", Math.Floor((double)(hosts.Length - badones) * 100.0 / (double)hosts.Length)); } Console.WriteLine("[Checking GetFiles()]"); int getfileserr = 0; MySpace.DataMining.Threading.ThreadTools<string>.Parallel( new Action<string>( delegate(string host) { string netpath = Surrogate.NetworkPathForHost(host); System.IO.DirectoryInfo dir = new System.IO.DirectoryInfo(netpath); try { System.IO.FileInfo[] files = dir.GetFiles("zd*.zd"); } catch (Exception e) { lock (hosts) { getfileserr++; Console.WriteLine("GetFiles() failed for host: {0}. Error: {1}", host, e.ToString()); } } }), hosts, nthreads); if (getfileserr > 0) { Console.WriteLine(" GetFiles() failed"); } else { Console.WriteLine(" GetFiles() succeeded"); } } if (verify) { Console.WriteLine("[Verify Drivers]"); string[] sl = new string[1]; bool vOK = true; foreach (string s in hosts) { sl[0] = s; if (!VerifyHostPermissions(sl)) { Console.Error.WriteLine("Ensure the Windows service is installed and running on '{0}'", s); vOK = false; } } if (vOK) { Console.WriteLine(" All machines are verified."); } } } break; case "repair": { dfs dc = LoadDfsConfig(); string[] slaves = dc.Slaves.SlaveList.Split(';'); bool unhealthy = false; for (int si = 0; si < slaves.Length; si++) { string reason; if (!Surrogate.IsHealthySlaveMachine(slaves[si], out reason)) { unhealthy = true; Console.WriteLine("{0} is unhealthy: {1}", slaves[si], reason); } } if (unhealthy) { Console.WriteLine("Cluster is unhealthy. Use " + appname + " removemachine to repair cluster."); } else { Console.WriteLine("Cluster is 100% healthy"); } } break; case "replicationphase": if (!ReplicationPhase(null, true, 0, null)) { Console.WriteLine("Nothing to replicate"); } break; case "replicationfix": { dfs dc = LoadDfsConfig(); Dictionary<string, bool> hd = new Dictionary<string, bool>(new Surrogate.CaseInsensitiveEqualityComparer()); StringBuilder sbHosts = new StringBuilder(); bool changedany = false; foreach (dfs.DfsFile df in dc.Files) { bool changedfile = false; foreach (dfs.DfsFile.FileNode fn in df.Nodes) { hd.Clear(); sbHosts.Length = 0; bool changednode = false; foreach (string chost in fn.Host.Split(';')) { string xchost = IPAddressUtil.GetName(chost); if (hd.ContainsKey(xchost)) { if (!changedfile) { Console.WriteLine(" Fixing {0}", df.Name); } changednode = true; changedfile = true; changedany = true; } else { if (0 != sbHosts.Length) { sbHosts.Append(';'); } sbHosts.Append(chost); hd.Add(xchost, true); } } if (changednode) { fn.Host = sbHosts.ToString(); } } } if (changedany) { UpdateDfsXml(dc); Console.WriteLine("Replication error fixed; to perform replication, issue command:"); Console.WriteLine(" {0} replicationphase", appname); } else { Console.WriteLine("No replication errors to fix"); } } break; case "replicationfactorupdate": case "replicationupdate": { if (args.Length <= 1) { Console.Error.WriteLine("Expected new replication factor"); SetFailure(); return; } else { EnterAdminCmd(); int newrf = int.Parse(args[1]); if (newrf < 1) { Console.Error.WriteLine("Replication factor must be at least 1"); SetFailure(); return; } else { int oldrf; dfs dc = LoadDfsConfig(); string[] slaves = dc.Slaves.SlaveList.Split(';'); oldrf = dc.Replication; if (newrf > slaves.Length) { Console.Error.WriteLine("Replication factor cannot be higher than the number of machines in the cluster ({0} is the maximum)", slaves.Length); SetFailure(); return; } if (newrf > oldrf) { checked { // Early disk space check... long freemin = long.MaxValue; long newspacezdcount = 0; long newspacezdsizes = 0; long newspacezsasizes = 0; //for (int si = 0; si < slaves.Length; si++) MySpace.DataMining.Threading.ThreadTools<string>.Parallel( new Action<string>( delegate(string slave) { //string slave = slaves[si]; string snetdir = Surrogate.NetworkPathForHost(slave); long x = (long)GetDiskFreeBytes(snetdir); lock (slaves) { if (x < freemin) { freemin = x; } } System.IO.DirectoryInfo netdi = new System.IO.DirectoryInfo(snetdir); { System.IO.FileInfo[] fis = (netdi).GetFiles("zd*.zd"); lock (slaves) { foreach (System.IO.FileInfo fi in fis) { newspacezdcount++; newspacezdsizes += fi.Length; } } } { System.IO.FileInfo[] fis = (netdi).GetFiles("zd*.zd.zsa"); lock (slaves) { foreach (System.IO.FileInfo fi in fis) { newspacezsasizes += fi.Length; } } } }), slaves); //freemin // If replication were 1, these are the file sizes on whole cluster. long singlereplicatezdsizetotal = newspacezdsizes / dc.Replication; long singlereplicatezsasizetotal = newspacezsasizes / dc.Replication; long freemintotal = freemin * slaves.Length; // Add a little padding (another average size data-node-chunk [64MB?]) long spacepaddingtotal = (newspacezdsizes / newspacezdcount) * slaves.Length; int morereplicates = newrf - oldrf; #if DEBUG if (morereplicates < 1) { throw new Exception("DEBUG: (morereplicates < 1)"); } #endif if ((morereplicates * (singlereplicatezdsizetotal + singlereplicatezsasizetotal + spacepaddingtotal)) > freemintotal) { Console.Error.WriteLine("Out of DFS disk space: there is not enough free space in DFS of the cluster to distribute the replicate data requested"); SetFailure(); return; } } } using (LockDfsMutex()) { dc = LoadDfsConfig(); if (dc.Replication != oldrf) { Console.Error.WriteLine("Replication factor already updated to {0}", dc.Replication); SetFailure(); return; } dc.Replication = newrf; UpdateDfsXml(dc); } Console.WriteLine("Replication factor set to {0}", newrf); if (newrf > oldrf) { if (!ReplicationPhase(null, true, 0, slaves)) { if (!QuietMode) { Console.Error.WriteLine("Nothing to replicate"); } } } else if (newrf < oldrf) { LowerReplicationCount(true); } } } } break; case "replicationfactorview": case "replicationview": { dfs dc = LoadDfsConfig(); Console.WriteLine("Replication factor is set to {0}", dc.Replication); } break; /* // Don't enable this due to admincmd. case "replicationfactor": case "replication": // ... break; * */ case "maxuserlogsupdate": { if (args.Length < 2) { Console.Error.WriteLine("Expected new maxUserLogs."); SetFailure(); return; } int max = 0; try { max = Int32.Parse(args[1]); } catch { Console.Error.Write("maxUserLogs must be an integer."); SetFailure(); return; } using (LockDfsMutex()) { dfs dc = LoadDfsConfig(); dc.MaxUserLogs = max; UpdateDfsXml(dc); } Console.WriteLine("MaxUserLogs set to {0}", max); } break; case "maxuserlogsview": { dfs dc = LoadDfsConfig(); Console.WriteLine("MaxUserLogs is set to {0}", dc.MaxUserLogs); } break; case "maxdglobalsupdate": { if (args.Length < 2) { Console.Error.WriteLine("Expected new maxDGlobals."); SetFailure(); return; } int max = 0; try { max = Int32.Parse(args[1]); } catch { Console.Error.Write("maxDGlobals must be an integer."); SetFailure(); return; } using (LockDfsMutex()) { dfs dc = LoadDfsConfig(); dc.MaxDGlobals = max; UpdateDfsXml(dc); } Console.WriteLine("MaxDGlobals set to {0}", max); } break; case "maxdglobalsview": { dfs dc = LoadDfsConfig(); Console.WriteLine("MaxDGlobals is set to {0}", dc.MaxDGlobals); } break; case "gen": case "generate": case "gendata": case "datagen": case "generatedata": case "genbin": case "bingen": case "genbinary": case "binarygen": case "generatebinary": case "asciigen": case "genascii": case "generateascii": case "wordgen": case "wordsgen": case "genword": case "genwords": case "generatewords": { #if STDOUT_LOG StdoutLog.Start(); #endif int iarg = 1; int iargseq = 1; // Index in forced sequence args; if int.MaxValue, an '=' was used. List<string> xpaths = null; string dfsoutput = null; long sizeoutput = long.MinValue; long rowsize = -1; string rowsep = Environment.NewLine; int writersCount = 0; GenerateRandom genrand = GenerateRandom.RANDOM; GenerateType gentype = GetGenerateType(act); for (; iarg < args.Length; iarg++) { string arg = args[iarg]; if (arg.StartsWith("/")) { if (null == xpaths) { xpaths = new List<string>(); } xpaths.Add(arg); } else { int ieq = arg.IndexOf('='); int argid = 0; if (-1 != ieq) { iargseq = int.MaxValue; string argname = arg.Substring(0, ieq); arg = arg.Substring(ieq + 1); switch (argname.ToLower()) { case "output-dfsfile": case "dfsfile": case "dfsoutput": case "output": argid = 1; break; case "size": case "outputsize": argid = 2; break; case "rowsize": case "row": argid = 3; break; case "writercount": case "writers": argid = 4; break; case "customrandom": //argid = 5; Console.Error.WriteLine("{0} is not valid", args[iarg]); SetFailure(); return; case "random": case "rand": if (0 == string.Compare(arg, "custom", true) || 0 == string.Compare(arg, "customrandom", true)) { genrand = GenerateRandom.DRANDOM; } else if (0 == string.Compare(arg, "Drandom", true) || 0 == string.Compare(arg, "Drand", true)) { genrand = GenerateRandom.DRANDOM; } else if (0 == string.Compare(arg, "Frandom", true) || 0 == string.Compare(arg, "Frand", true)) { genrand = GenerateRandom.FRANDOM; } else if (0 == string.Compare(arg, "random", true) || 0 == string.Compare(arg, "rand", true)) { genrand = GenerateRandom.RANDOM; } else if (0 == string.Compare(arg, "default", true)) { genrand = GenerateRandom.RANDOM; } else { Console.Error.WriteLine("Unknown random setting {0}", arg); SetFailure(); return; } continue; // Next arg... case "rows": case "rowcount": { if (long.MinValue == rowsize) { Console.Error.WriteLine("Row size must be specified before row count"); SetFailure(); return; } long nrows = long.Parse(arg); sizeoutput = (rowsize + rowsep.Length) * nrows; } continue; // Next arg... case "type": gentype = GetGenerateType(arg); continue; // Next arg... default: Console.Error.WriteLine("Unknown named argument '{0}' in {1}", argname, args[iarg]); SetFailure(); return; } } else { if (int.MaxValue == iargseq) { Console.Error.WriteLine("Argument error: <name>=<value> expected for argument {0}: {1}", iarg + 1, arg); SetFailure(); return; } argid = iargseq++; } switch (argid) { case 1: // output-dfsfile { dfsoutput = arg; } break; case 2: // outputsize sizeoutput = ParseLongCapacity(arg); break; case 3: // rowsize rowsize = ParseLongCapacity(arg); break; case 4: // writercount try { writersCount = Int32.Parse(arg); } catch (Exception e) { throw new FormatException("Invalid writers count: " + arg, e); } break; case 5: // customrandom if (0 == string.Compare(arg, "customrandom", true)) { genrand = GenerateRandom.DRANDOM; } else { Console.Error.WriteLine("Expected customrandom or end of arguments, not {0}", arg); SetFailure(); return; } break; default: throw new Exception(string.Format("DEBUG: Arguments parse failure: args[{0}]: {1}", iarg, args[iarg])); } } } if (string.IsNullOrEmpty(dfsoutput) || long.MinValue == sizeoutput) { #if DEBUG System.Diagnostics.Debugger.Launch(); #endif Console.Error.WriteLine("Arguments expected: [\"</xpath>=<value>\"] <output-dfsfile> <outputsize> [type=<bin|ascii|word>] [row=<size>] [writers=<count>] [rand=<DRand|FRand>]"); SetFailure(); return; } Generate(xpaths, dfsoutput, sizeoutput, rowsize, gentype, writersCount, genrand); } break; case "slaveinstalls": { // Note: doesn't include a non-participating surrogate! (it's not in SlaveList). dfs dc = LoadDfsConfig(); string[] hosts = dc.Slaves.SlaveList.Split(';'); bool healthcheck = (args.Length > 1 && "-healthy" == args[1]); if (healthcheck) { int threadcount = hosts.Length; if (threadcount > 15) { threadcount = 15; } int goodcount = 0; MySpace.DataMining.Threading.ThreadTools<string>.Parallel( new Action<string>( delegate(string host) { if (Surrogate.IsHealthySlaveMachine(host)) { //lock (hosts) { Console.WriteLine("{0} {1}", host, Surrogate.NetworkPathForHost(host)); //goodcount++; } System.Threading.Interlocked.Increment(ref goodcount); } }), hosts, threadcount); if (goodcount < dc.Replication) { Console.Error.WriteLine("Not enough healthy machines in cluster for replication factor of {0}", dc.Replication); SetFailure(); return; } } else { foreach (string host in hosts) { Console.WriteLine("{0} {1}", host, Surrogate.NetworkPathForHost(host)); } } } break; case "clusterconfigview": //clusterconfigview_cmd: if (args.Length <= 1) { Console.Error.WriteLine("Expected cluster config xpath"); return; } { string xpath = dfs.FixXPath(args[1]); System.Xml.XmlDocument xd = new System.Xml.XmlDocument(); using (LockDfsMutex()) { xd.Load(DFSXMLPATH); } System.Xml.XmlNodeList xnl = xd.SelectNodes(xpath); for (int j = 0; j < xnl.Count; j++) { //Console.WriteLine(" \"{0}\" = \"{1}\"", xnl[j].Name, xnl[j].InnerText); Console.WriteLine(xnl[j].InnerText); } } break; case "clusterconfigupdate": //clusterconfigupdate_cmd: if (args.Length <= 2) { Console.Error.WriteLine("Expected cluster config xpath and new value"); return; } { string xpath = dfs.FixXPath(args[1]); string value = args[2]; bool removing = "-" == value; int nvalues = 0; string status; using (LockDfsMutex()) { bool needsave = false; System.Xml.XmlDocument xd = new System.Xml.XmlDocument(); xd.Load(DFSXMLPATH); System.Xml.XmlNodeList xnl = xd.SelectNodes(xpath); for (int j = 0; j < xnl.Count; j++) { if (removing) { xnl[j].ParentNode.RemoveChild(xnl[j]); } else { xnl[j].InnerText = value; } nvalues++; } if (nvalues > 0) { needsave = true; if (1 == nvalues) { status = "Value updated"; } else { status = nvalues.ToString() + " values updated"; } } else { status = "0 values updated"; if (!removing) { // Attempt to add the value. int ilslash = xpath.LastIndexOf('/'); if (-1 != ilslash && ilslash < xpath.Length - 1) { string newnodename = xpath.Substring(ilslash + 1); xpath = xpath.Substring(0, ilslash); xnl = xd.SelectNodes(xpath); nvalues = 0; for (int j = 0; j < xnl.Count; j++) { System.Xml.XmlElement xe = xd.CreateElement(newnodename); xe.InnerText = value; xnl[j].AppendChild(xe); nvalues++; } if (nvalues > 0) { needsave = true; if (1 == nvalues) { status = "Value added"; } else { if (nvalues > 0) { status = nvalues.ToString() + " values added"; } } } } } } if (needsave) { using (System.IO.Stream stm = new System.IO.MemoryStream(Encoding.UTF8.GetBytes(xd.InnerXml))) { using (System.IO.StreamReader srconfig = new System.IO.StreamReader(stm)) { System.Xml.Serialization.XmlSerializer xs = new System.Xml.Serialization.XmlSerializer(typeof(dfs)); dfs dc = (dfs)xs.Deserialize(srconfig); if (null == dc.Files) { dc.Files = new List<dfs.DfsFile>(); } if (null == dc.slave) { dc.slave = new dfs.ConfigSlave(); } if (null == dc.slave.zblocks) { dc.slave.zblocks = new dfs.ConfigSlave.ConfigZBlocks(); } if (dc.Blocks.SortedTotalCount <= 0) { dc.Blocks.SortedTotalCount = dc.Slaves.SlaveList.Split(';', ',').Length * Surrogate.NumberOfProcessors; } UpdateDfsXml(dc); } } } } Console.WriteLine(status); } break; /* // Don't enable this due to admincmd. case "clusterconfig": if (args.Length == 2) { goto clusterconfigview_cmd; } else if (args.Length == 3) { goto clusterconfigupdate_cmd; } else { Console.WriteLine("Invalid number of arguments, expected: clusterconfig <xpath> [<value>]"); } break; * */ case "history": try { int iarg = 1; List<string> surrogates = null; bool showjids = false; int nlines = 10; for (; iarg < args.Length; iarg++) { if ("-j" == args[iarg]) { showjids = true; continue; } else if ("-j-" == args[iarg]) { showjids = false; continue; } else { int xnlines; if (int.TryParse(args[iarg], out xnlines)) { nlines = xnlines; continue; } } break; } if (args.Length > iarg) { surrogates = new List<string>(); string shosts = args[iarg++]; if (shosts.StartsWith("@")) { shosts = System.IO.File.ReadAllText(shosts.Substring(1)); foreach (string host in Surrogate.GetHostsFromFile(shosts.Substring(1))) { string surrogate = Surrogate.LocateMasterHost(Surrogate.NetworkPathForHost(host)); if (!surrogates.Contains(surrogate)) { surrogates.Add(surrogate); } } } else { foreach (string host in shosts.Split(';', ',')) { string surrogate = Surrogate.LocateMasterHost(Surrogate.NetworkPathForHost(host)); if (!surrogates.Contains(surrogate)) { surrogates.Add(surrogate); } } } } else { //surrogates.Add(System.Net.Dns.GetHostName()); } // Local function: Action<string, string> printhistory = new Action<string, string>( delegate(string host, string clustername) { string fp = Surrogate.NetworkPathForHost(host) + @"\execlog.txt"; string[] hlines; { const int iMAX_SECS_RETRY = 10; // Note: doesn't consider the time spent waiting on I/O. const int ITER_MS_WAIT = 100; // Milliseconds to wait each retry. int iters = iMAX_SECS_RETRY * 1000 / ITER_MS_WAIT; for (; ; ) { try { hlines = System.IO.File.ReadAllLines(fp); break; } catch { if (--iters < 0) { throw; } System.Threading.Thread.Sleep(ITER_MS_WAIT); continue; } } } int mynlines = nlines; if (mynlines > hlines.Length) { mynlines = hlines.Length; } Console.WriteLine("History of last {0} actions on {1} cluster:", mynlines, clustername); for (int i = hlines.Length - mynlines; i < hlines.Length; i++) { string ln = hlines[i]; bool oldspace = true; ln = ln.Replace(" -@log ", " "); try { int ijid = ln.IndexOf("] @JID#"); if (-1 != ijid) { ijid += 2; string b4j = ln.Substring(0, ijid); int isp = ln.IndexOf(' ', ijid); if (-1 != isp) { string afj = ln.Substring(isp + 1); if (showjids) { // Same format as ps. ln = ln.Substring(ijid + 5, isp - (ijid + 5) + 1) + b4j + afj; oldspace = false; } else { ln = b4j + afj; } } } } catch (Exception eajaj) { #if DEBUG System.Diagnostics.Debugger.Launch(); #endif } Console.WriteLine("{0}{1}", (oldspace ? " " : " "), ln.Replace("drule", "SYSTEM")); } }); if (null == surrogates) { string host = System.Net.Dns.GetHostName(); printhistory(host, "current"); } else if (surrogates.Count < 1) { Console.Error.WriteLine("Error: no hosts"); SetFailure(); return; } else //if (surrogates.Count > 1) { for (int hi = 0; hi < surrogates.Count; hi++) { if (hi > 0) { Console.WriteLine("--------------------------------"); } printhistory(surrogates[hi], surrogates[hi]); } } } catch (Exception ehh) { LogOutputToFile(ehh.ToString()); Console.Error.WriteLine("No history"); } break; case "exechistory": { dfs dc = LoadDfsConfig(); string[] execHistory = GetExecHistory(dc.LogExecHistory); if (execHistory != null && execHistory.Length > 0) { Console.WriteLine("History of last {0} exec actions:", execHistory.Length); for (int i = 0; i < execHistory.Length; i++) { int ast = execHistory[i].IndexOf('*'); Console.WriteLine(" {0} [ " + appname + " execview {1} ]", execHistory[i].Substring(ast + 1), execHistory[i].Substring(0, ast)); } } else { Console.Error.WriteLine("No exec history"); } } break; case "listinstalldir": { dfs dc = LoadDfsConfig(); string[] hosts = dc.Slaves.SlaveList.Split(';'); if (hosts.Length > 0) { foreach (string host in hosts) { Console.WriteLine(MySpace.DataMining.DistributedObjects5.DistObject.GetNetworkPath(host)); } } } break; case "psstatus": SafePS(true); break; case "ps": SafePS(); Console.WriteLine("Information:"); if (!dfs.DfsConfigExists(DFSXMLPATH)) { //Console.Error.WriteLine("DFS not setup; use: {0} dfs format", appname); //SetFailure(); return; } { checked { long totalmem = 0; long nodeminmem = long.MaxValue; long totalfreemem = 0; long nodeminfreemem = long.MaxValue; dfs dc = LoadDfsConfig(); string[] hosts = dc.Slaves.SlaveList.Split(';'); if (hosts.Length > 0) { //foreach (string host in hosts) MySpace.DataMining.Threading.ThreadTools<string>.Parallel( new Action<string>( delegate(string host) { string meminfo = GetMemoryStatusForHost(host); if (null == meminfo) { Console.Error.WriteLine("Unable to get memory information for host '{0}'", host); } else { string[] memlines = meminfo.Split('\n'); lock (hosts) { foreach (string _ml in memlines) { string ml = _ml.Trim(); if (ml.StartsWith("TotalPhys: ")) { long x = long.Parse(ml.Substring(ml.IndexOf(' ') + 1)); totalmem += x; if (x < nodeminmem) { nodeminmem = x; } } else if (ml.StartsWith("AvailPhys: ")) { long x = long.Parse(ml.Substring(ml.IndexOf(' ') + 1)); totalfreemem += x; if (x < nodeminfreemem) { nodeminfreemem = x; } } } } } } ), hosts, hosts.Length); if (nodeminmem == long.MaxValue) { nodeminmem = 0; } if (nodeminfreemem == long.MaxValue) { nodeminfreemem = 0; } Console.WriteLine(" {0} Total Memory\r\n {1} machine avg\r\n {2} machine min\r\n {3} process avg", GetFriendlyByteSize(totalmem), GetFriendlyByteSize(totalmem / (long)hosts.Length), GetFriendlyByteSize(nodeminmem), GetFriendlyByteSize((long)((double)nodeminmem / ((double)dc.Blocks.TotalCount / (double)hosts.Length)))); Console.WriteLine(" {0} Free Memory\r\n {1} machine avg\r\n {2} machine min\r\n {3} process avg", GetFriendlyByteSize(totalfreemem), GetFriendlyByteSize(totalfreemem / (long)hosts.Length), GetFriendlyByteSize(nodeminfreemem), GetFriendlyByteSize((long)((double)nodeminfreemem / ((double)dc.Blocks.TotalCount / (double)hosts.Length)))); Console.WriteLine(" {0} Processes\r\n {1} Machines", dc.Blocks.TotalCount, hosts.Length); } } } break; case "ver": case "version": { string buildtype; #if DEBUG buildtype = "debug"; #else buildtype = "release"; #endif //Console.WriteLine("AELight \"{0}\" {1} build {2}", appname, buildtype, GetBuildInfo()); Console.WriteLine("Version: " + (GetBuildDateTime().ToString()).Replace(":", ".").Replace(" ", ".").Replace("/", ".").Replace("AM", "A").Replace("PM", "P")); } break; case "#mem": { string memhost = "localhost"; // Note: probably local to master... if (args.Length > 1) { memhost = args[1]; } string meminfo = GetMemoryStatusForHost(memhost); if (null == meminfo) { Console.Error.WriteLine("Unable to get memory information for host '{0}'", memhost); SetFailure(); return; } Console.WriteLine(meminfo); } break; case "md5": #if STDOUT_LOG StdoutLog.Start(); #endif if (args.Length > 1) { GenerateHash("MD5", args[1]); } else { Console.Error.WriteLine("DFS file name expected"); SetFailure(); return; } break; case "sum": case "checksum": #if STDOUT_LOG StdoutLog.Start(); #endif if (args.Length > 1) { GenerateHash("Sum", args[1]); } else { Console.Error.WriteLine("DFS file name expected"); SetFailure(); return; } break; case "checksummt": #if STDOUT_LOG StdoutLog.Start(); #endif if (args.Length > 1) { CheckSummt("Sum_mt", args[1]); } else { Console.Error.WriteLine("DFS file name expected"); SetFailure(); return; } break; case "sum2": case "checksum2": #if STDOUT_LOG StdoutLog.Start(); #endif if (args.Length > 1) { GenerateHash("Sum2", args[1]); } else { Console.Error.WriteLine("DFS file name expected"); SetFailure(); return; } break; case "sortedmt": #if STDOUT_LOG StdoutLog.Start(); #endif if (args.Length > 1) { CheckSortedmt(args[1]); } else { Console.Error.WriteLine("DFS file name expected"); SetFailure(); return; } break; case "sorted": case "checksorted": case "issorted": #if STDOUT_LOG StdoutLog.Start(); #endif if (args.Length > 1) { CheckSorted(args[1]); } else { Console.Error.WriteLine("DFS file name expected"); SetFailure(); return; } break; case "dfs": if (args.Length < 2) { Console.Error.WriteLine("DFS command expected"); ShowUsage(); } else { string dfsarg = args[1]; Dfs(dfsarg, SubArray(args, 2)); } break; case "combine": case "info": case "information": case "head": case "get": case "getbinary": case "put": case "putbinary": case "copy": case "cp": case "del": case "delete": case "rm": case "rename": case "ren": case "move": case "mv": case "getjobs": case "putjobs": case "ls": case "dir": case "invalidate": case "delmt": case "delst": case "\u0040format": case "format": case "countparts": case "filesize": case "bulkput": case "bulkget": case "swap": case "fput": case "fget": case "partinfo": case "delchunk": { string dfsarg = args[0]; Dfs(dfsarg, SubArray(args, 1)); } break; case "shuffle": case "copyto": #if STDOUT_LOG StdoutLog.Start(); #endif { string dfsarg = args[0]; Dfs(dfsarg, SubArray(args, 1)); } break; case "edit": case "editor": Console.Error.WriteLine("Error: must call " + appname + " to use jobs editor"); SetFailure(); break; case "dfsbind": #if DEBUG //System.Threading.Thread.Sleep(1000 * 8); #endif if (!dfs.DfsConfigExists(DFSXMLPATH)) { Console.Error.WriteLine("DFS not setup; use: {0} format", appname); SetFailure(); return; } if (args.Length < 5) { Console.Error.WriteLine("Invalid arguments"); ShowUsage(); } else { // Note: dfsbind expects files to be in expected format with needed samples, etc. // For internal use. string newactualfilehost = args[1]; string newactualfilename = args[2]; string newprettyfilename = args[3]; string filetype = args[4]; bool autoskip4bytes = false; try { autoskip4bytes = (args.Length > 5) && "-h4" == args[5]; } catch { } /*if (0 != string.Compare(DfsFileTypes.JOB, filetype)) { Console.Error.WriteLine("dfsbind not supported for DFS files of type " + filetype); SetFailure(); return; }*/ { long flen = 0; dfs.DfsFile df = new dfs.DfsFile(); df.Nodes = new List<dfs.DfsFile.FileNode>(1); if (newactualfilename.Length > 0 && "/" != newactualfilename) { string ActualFile = Surrogate.NetworkPathForHost(newactualfilehost) + @"\" + newactualfilename; System.IO.FileInfo finfo = new System.IO.FileInfo(ActualFile); //if (finfo.Exists) { flen = finfo.Length; if (autoskip4bytes) { flen -= 4; if (flen < 0) { flen = 0; } } dfs.DfsFile.FileNode fnode = new dfs.DfsFile.FileNode(); fnode.Host = newactualfilehost; fnode.Position = 0; fnode.Length = flen; fnode.Name = newactualfilename; df.Nodes.Add(fnode); } } df.Name = ".$" + newprettyfilename + ".$replicating-" + Guid.NewGuid().ToString(); df.Size = flen; df.Type = filetype; using (LockDfsMutex()) { dfs dc = LoadDfsConfig(); // Reload in case of intermediate change. if (null != DfsFindAny(dc, df.Name)) { Console.Error.WriteLine("Output file already exists:" + df.Name); SetFailure(); return; } dc.Files.Add(df); UpdateDfsXml(dc); // ! } ReplicationPhase(df.Name, false, 0, null); // Note: doesn't use unhealthy-slaves exclusion list! using (LockDfsMutex()) { dfs dc = LoadDfsConfig(); // Reload in case of intermediate change. dfs.DfsFile dfu = dc.FindAny(df.Name); if (null != dfu) { if (null != DfsFindAny(dc, newprettyfilename)) { Console.Error.WriteLine("Output file already exists"); SetFailure(); return; } dfu.Name = newprettyfilename; UpdateDfsXml(dc); } } } } break; case "@log": // No action, it's just logged. break; case "generatefasttests" : if (!dfs.DfsConfigExists(DFSXMLPATH)) { Console.Error.WriteLine("DFS not setup; use: {0} format", appname); SetFailure(); return; } try { FastRegressionTest.GenFastRegressionTests(); } catch (Exception e) { LogOutput(e.ToString()); } break; case "examples": case "example": if (!dfs.DfsConfigExists(DFSXMLPATH)) { Console.Error.WriteLine("DFS not setup; use: {0} format", appname); SetFailure(); return; } try { Examples.Generate(); } catch (Exception e) { LogOutput(e.ToString()); } break; case "stresstests": if (!dfs.DfsConfigExists(DFSXMLPATH)) { Console.Error.WriteLine("DFS not setup; use: {0} format", appname); SetFailure(); return; } string whattest = "sort"; if (args.Length > 1) { whattest = args[1].ToLower(); } try { switch (whattest) { case "sort": StressTests.SortTests.GenerateTests(); break; case "valuesize": StressTests.ValueSizeTests.GenerateTests(); break; case "criticalsection": StressTests.CriticalSectionTests.GenerateTests(); break; default: Console.Error.WriteLine("Invalid test names. Try: " + appname + " stresstests <sort | valuesize>"); SetFailure(); return; } } catch (Exception e) { LogOutput(e.ToString()); } break; case "replacemachine": EnterAdminCmd(); if (!dfs.DfsConfigExists(DFSXMLPATH)) { Console.Error.WriteLine("DFS not setup; use: {0} format", appname); SetFailure(); return; } { string oldhost = null, newhost = null; bool DontTouchRMHost = false; bool RMForce = false; for (int iarg = 1; iarg < args.Length; iarg++) { if (args[iarg][0] == '-') { switch (args[iarg]) { case "-s": DontTouchRMHost = true; break; case "-f": RMForce = true; break; default: Console.Error.WriteLine("Warning: Unknown switch: {0}", args[iarg]); break; } } else { if (null == oldhost) { oldhost = args[iarg]; } else if (null == newhost) { newhost = args[iarg]; } else { Console.Error.WriteLine("Too many hosts specified: {0}, {1} and {2}", oldhost, newhost, args[iarg]); SetFailure(); return; } } } if (args.Length < 3) { Console.Error.WriteLine("replacemachine error: expected <oldmachine> <newmachine>"); SetFailure(); return; } if (0 == string.Compare(IPAddressUtil.GetName(oldhost), IPAddressUtil.GetName(Surrogate.MasterHost), StringComparison.OrdinalIgnoreCase)) { Console.Error.WriteLine("Error: machine '{0}' being replaced is the surrogate" + "; to replace surrogate, must use command" + " replacesurrogate [-f] [-nostop] [-s] <oldhost> <metabackup-path> <target-dspace-path> [<new-metabackup-path>] [-metabackup-now]", oldhost); SetFailure(); return; } if (!VerifyHostPermissions(new string[] { newhost })) { Shell(@"sc \\" + newhost + " start DistributedObjects"); System.Threading.Thread.Sleep(1000 * 3); // Time to start. if (!VerifyHostPermissions(new string[] { newhost })) { Console.Error.WriteLine("Unable to ReplaceMachine: ensure the Windows service is installed and running on '{0}'", newhost); SetFailure(); return; } } ReplaceMachine(oldhost, newhost, DontTouchRMHost, RMForce); Console.WriteLine("Done"); } break; case "replacesurrogate": // [-f] [-nostop] [-s] [-nometabackup] ... EnterAdminCmd(); { bool stop = true; string oldhost = null, newhost = null; string metabackuplocation = null; string newmetabackuppath = ""; string targetdspacepath = null; bool DontTouchRMHost = false; bool callmetabackupnow = true; bool RMForce = false; int narg = 0; for (int iarg = 1; iarg < args.Length; iarg++) { if (args[iarg][0] == '-') { switch (args[iarg]) { case "-s": DontTouchRMHost = true; break; case "-f": RMForce = true; break; case "-nostop": stop = false; break; case "-nometabackup": callmetabackupnow = false; break; default: Console.Error.WriteLine("Unknown switch: {0}", args[iarg]); return; } } else { // <oldhost> <metabackup-path> <target-dspace-path> [<new-metabackup-path>] switch (narg++) { case 0: oldhost = args[iarg]; break; case 1: metabackuplocation = args[iarg]; break; case 2: targetdspacepath = args[iarg]; break; case 3: // Optional. newmetabackuppath = args[iarg]; break; default: Console.Error.WriteLine("Too many arguments specified: {0}", args[iarg]); SetFailure(); return; } } } if (narg <= 2) { Console.Error.WriteLine("replacesurrogate error: too few arguments"); SetFailure(); return; } if (0 != string.Compare(IPAddressUtil.GetName(oldhost), IPAddressUtil.GetName(Surrogate.MasterHost), StringComparison.OrdinalIgnoreCase)) { Console.Error.WriteLine("Error: machine '{0}' being replaced is not the existing surrogate" + "; the existing surrogate is '{1}'", oldhost, Surrogate.MasterHost); SetFailure(); return; } if (string.IsNullOrEmpty(newmetabackuppath)) { if (callmetabackupnow) { Console.Error.WriteLine("Must specify metabackup location in order to metabackup now"); SetFailure(); return; } Console.WriteLine("Note: new metabackup location not specified"); } else { if (!System.IO.Directory.Exists(newmetabackuppath)) { System.IO.Directory.CreateDirectory(newmetabackuppath); } if (!callmetabackupnow) { Console.WriteLine("Note: -nometabackup was specified; {0} metabackup -backup-now" + " will need to be manually issued after this command", appname); } } string metabackupdfsxmlpath; if (System.IO.Directory.Exists(metabackuplocation)) { string dfsbackupxml = metabackuplocation + @"\dfs-backup.xml"; // Favor this one. if (System.IO.File.Exists(dfsbackupxml)) { metabackupdfsxmlpath = dfsbackupxml; } else { string[] xmlfiles = System.IO.Directory.GetFiles(metabackuplocation, "*.xml"); if (xmlfiles.Length > 1) { Console.Error.WriteLine("Error: Too many xml files found in metabackup location; remove all but one and try again: {0}", metabackuplocation); Console.WriteLine("Must be exactly one *.xml file in metabackup location"); SetFailure(); return; } else if (xmlfiles.Length < 1) { Console.Error.WriteLine("Error: {0} not found in metabackup location: {1}", dfs.DFSXMLNAME, metabackuplocation); SetFailure(); return; } else //if (xmlfiles.Length == 1) { metabackupdfsxmlpath = xmlfiles[0]; } } } else if (System.IO.File.Exists(metabackuplocation)) { Console.WriteLine("Error: must speicfy directory of metabackup, not file: {0}", metabackuplocation); SetFailure(); return; } else { Console.WriteLine("Error: metabackup directory not found: {0}", metabackuplocation); SetFailure(); return; } if (targetdspacepath.StartsWith(@"\\")) { int ixh = targetdspacepath.IndexOf('\\', 2); if (-1 == ixh) { Console.Error.WriteLine("Error: problem parsing network from path: {0}", targetdspacepath); SetFailure(); return; } newhost = targetdspacepath.Substring(2, ixh - 2); } else { //newhost = System.Net.Dns.GetHostName(); Console.WriteLine("Error: network path required for target {0} install directory for surrogate: {1}", appname, targetdspacepath); SetFailure(); return; } if (!VerifyHostPermissions(new string[] { newhost })) { Shell(@"sc \\" + newhost + " start DistributedObjects"); System.Threading.Thread.Sleep(1000 * 3); // Time to start. if (!VerifyHostPermissions(new string[] { newhost })) { Console.Error.WriteLine("Unable to ReplaceMachine: ensure the Windows service is installed and running on '{0}'", newhost); SetFailure(); return; } } Console.WriteLine("Loading metabackup metadata...", metabackupdfsxmlpath); dfs mbdc; try { mbdc = dfs.ReadDfsConfig_unlocked(metabackupdfsxmlpath); } catch (Exception e) { Console.Error.WriteLine("Unable to read metadata from '{0}': {1}", metabackupdfsxmlpath, e.Message); SetFailure(); return; } string[] slaves = mbdc.Slaves.SlaveList.Split(';'); int threadcount = slaves.Length; if (threadcount > 15) { threadcount = 15; } Console.WriteLine("Accessing target " + appname + " path {0} ...", targetdspacepath); if (!System.IO.File.Exists(targetdspacepath + @"\aelight.exe")) { Console.Error.WriteLine("Problem accessing target " + appname + " path '{0}': {1}", targetdspacepath, appname + " is not installed at this location"); SetFailure(); return; } try { // Run a little test to verify... string fp = targetdspacepath + "\\replacesurrogate." + Surrogate.SafeTextPath(System.Net.Dns.GetHostName()) + "." + Guid.NewGuid(); System.IO.File.WriteAllText(fp, "[" + DateTime.Now.ToString() + "] replacesurrogate command issued from " + System.Net.Dns.GetHostName() + " {7BCD3A7C-3FA6-466f-84CB-51D70BB2B686}" + Environment.NewLine); if (-1 == System.IO.File.ReadAllText(fp).IndexOf("{7BCD3A7C-3FA6-466f-84CB-51D70BB2B686}")) { System.IO.File.Delete(fp); throw new System.IO.IOException("Read verification error {7BCD3A7C-3FA6-466f-84CB-51D70BB2B686}"); } System.IO.File.Delete(fp); } catch (Exception e) { Console.Error.WriteLine("Problem accessing target " + appname + " path '{0}': {1}", targetdspacepath, e.Message); SetFailure(); return; } // So stopping services doesn't kill this instance. try { MakeInvincible(); } catch (Exception e) { Console.WriteLine("Warning: {0}", e.Message); LogOutputToFile("MakeInvincible warning during ReplaceSurrogate: " + e.ToString()); } _CleanPidFile_unlocked(); if (stop) { Console.WriteLine(" Stopping services..."); System.Threading.Thread.Sleep(1000 * 1); // Give a sec. MySpace.DataMining.Threading.ThreadTools<string>.Parallel( new Action<string>( delegate(string host) { try { if (0 == string.Compare(IPAddressUtil.GetName(oldhost), IPAddressUtil.GetName(host), StringComparison.OrdinalIgnoreCase)) { return; } Shell("sc \\\\" + host + " stop DistributedObjects"); } catch { } }), slaves, threadcount); Shell("sc \\\\" + newhost + " stop DistributedObjects"); System.Threading.Thread.Sleep(1000 * 3); // Give a bit of extra time to shutdown. } try { ReplaceMachineMetadataMemory(mbdc, oldhost, newhost, DontTouchRMHost, RMForce, true); { Console.WriteLine("Restoring surrogate..."); Surrogate.SetNewMasterHost(newhost); Surrogate.SetNewMetaLocation(targetdspacepath); Console.WriteLine(" Restoring jobs files..."); foreach (System.IO.FileInfo zdfi in (new System.IO.DirectoryInfo(metabackuplocation)).GetFiles("*.zd")) { System.IO.File.Copy(zdfi.FullName, targetdspacepath + @"\" + zdfi.Name, true); } try { string schedulerbackuplocation = newmetabackuppath; if (string.IsNullOrEmpty(schedulerbackuplocation)) { schedulerbackuplocation = null; } if (MySpace.DataMining.DistributedObjects.Scheduler.BackupRestore( metabackuplocation, targetdspacepath, schedulerbackuplocation)) { //Console.WriteLine("Restored scheduled and queued tasks"); } else { //Console.WriteLine("No scheduled or queued tasks to restore"); } } catch (System.IO.FileNotFoundException e) { Console.WriteLine("Warning: unable to restore scheduled and queued tasks, perhaps it was never backed up from before this feature."); Console.WriteLine("Message: {0}", e.Message); } mbdc.MetaBackup = newmetabackuppath; if (!string.IsNullOrEmpty(newmetabackuppath)) { EnsureMetaBackupLocation(mbdc); // Important! Only do this AFTER restoring everything from metabackup location! // Because the user might want to re-use the same directory. foreach (string fn in System.IO.Directory.GetFiles(mbdc.GetMetaBackupLocation())) { System.IO.File.Delete(fn); } } // Save mbdc to targetdspacepath Console.WriteLine(" Restoring metadata..."); try { System.IO.File.Delete(targetdspacepath + @"\dfs.xml"); } catch { } try { System.IO.File.Delete(targetdspacepath + @"\slave.dat"); } catch { } { // Updating slave.dat if found... // If no slave.dat, it's probably a participating surrogate. MySpace.DataMining.Threading.ThreadTools<string>.Parallel( new Action<string>( delegate(string slave) { if (0 == string.Compare(IPAddressUtil.GetName(oldhost), IPAddressUtil.GetName(slave), StringComparison.OrdinalIgnoreCase)) { return; } try { System.IO.File.Delete(Surrogate.NetworkPathForHost(slave) + @"\dfs.xml"); } catch { } try { string sdfp = Surrogate.NetworkPathForHost(slave) + @"\slave.dat"; if (System.IO.File.Exists(sdfp)) { string[] sd = System.IO.File.ReadAllLines(sdfp); string sdfpnew = sdfp + ".new"; using (System.IO.StreamWriter sw = System.IO.File.CreateText(sdfpnew)) { bool fm = false; for (int i = 0; i < sd.Length; i++) { string line = sd[i]; if (line.StartsWith("master=", StringComparison.OrdinalIgnoreCase)) { line = "master=" + newhost; fm = true; } sw.WriteLine(line); } if (!fm) { throw new Exception("Invalid slave.dat on " + slave + " - master=host entry not found"); } } System.IO.File.Delete(sdfp); System.IO.File.Move(sdfpnew, sdfp); } else { // If it doesn't exist, write out a new one, but not if it is surrogate. if (0 != string.Compare(IPAddressUtil.GetName(newhost), IPAddressUtil.GetName(slave), StringComparison.OrdinalIgnoreCase)) { System.IO.File.WriteAllText(sdfp, "master=" + newhost + Environment.NewLine); } } } catch (Exception e) { lock (slaves) { Console.Error.WriteLine("WARNING: Error on machine {0}: {1}", slave, e.Message); } } }), slaves, threadcount); } { // Fix old surrogate jobs-files references. foreach (dfs.DfsFile df in mbdc.Files) { if (0 == string.Compare(df.Type, DfsFileTypes.JOB, StringComparison.OrdinalIgnoreCase)) { foreach (dfs.DfsFile.FileNode fn in df.Nodes) { fn.Host = newhost; } } } } } // Write new dfs.xml... only if ReplaceMachineMetadataMemory succeeded. UpdateDfsXml(mbdc, targetdspacepath + @"\" + dfs.DFSXMLNAME, mbdc.GetMetaBackupLocation()); } finally { // Restart the services even if an exception happened, so you can run a command again. if (stop) { Console.WriteLine(" Starting services..."); System.Threading.Thread.Sleep(1000 * 1); // Give a sec. MySpace.DataMining.Threading.ThreadTools<string>.Parallel( new Action<string>( delegate(string host) { try { if (0 == string.Compare(IPAddressUtil.GetName(oldhost), IPAddressUtil.GetName(host), StringComparison.OrdinalIgnoreCase)) { return; } Shell("sc \\\\" + host + " start DistributedObjects"); } catch { } }), slaves, threadcount); Shell("sc \\\\" + newhost + " start DistributedObjects"); System.Threading.Thread.Sleep(1000 * 1); // Give a sec to startup. } } Console.WriteLine("Done"); LeaveAdminLock(); if (callmetabackupnow) { Console.WriteLine(); Console.WriteLine("Calling '{0} metabackup -backup-now' on new surrogate...", appname); System.Threading.Thread.Sleep(1000 * 60); // Give another sec to startup. Console.WriteLine(Shell(appname + " @=" + newhost + " metabackup -backup-now").Trim()); } if (!callmetabackupnow) { if (!string.IsNullOrEmpty(newmetabackuppath)) { Console.WriteLine("Type the following to backup the current meta-data:"); Console.WriteLine(" {0} metabackup -backup-now", appname); } else { Console.WriteLine("Use the metabackup command to re-enable metabackups"); } } } break; case "addnode": case "addmachine": EnterAdminCmd(); if (!dfs.DfsConfigExists(DFSXMLPATH)) { Console.Error.WriteLine("DFS not setup; use: {0} format", appname); SetFailure(); return; } { if (args.Length < 2) { Console.Error.WriteLine("addmachine error: new machine host expected"); SetFailure(); return; } string newhost = args[1]; AddMachine(newhost); } break; case "delnode": case "deletenode": case "removenode": case "remnode": case "delmachine": case "deletemachine": case "removemachine": case "remmachine": EnterAdminCmd(); if (!dfs.DfsConfigExists(DFSXMLPATH)) { Console.Error.WriteLine("DFS not setup; use: {0} format", appname); SetFailure(); return; } { if (args.Length < 2) { Console.Error.WriteLine("removemachine error: old host expected"); SetFailure(); return; } string oldhost = args[1]; RemoveMachine(oldhost); } break; case "who": { List<string> hosts = new List<string>(); { if (args.Length > 1) { string shosts = args[1]; if (shosts.StartsWith("@")) { Surrogate.GetHostsFromFileAppend(shosts.Substring(1), hosts); } else { hosts.AddRange(shosts.Split(';', ',')); } } else { dfs dc = LoadDfsConfig(); hosts.AddRange(dc.Slaves.SlaveList.Split(';')); { // Always include self host if current cluster. if (null == GetSelfHost(hosts)) { hosts.Add(System.Net.Dns.GetHostName()); } } } } List<string> results = new List<string>(); MySpace.DataMining.Threading.ThreadTools<string>.Parallel( new Action<string>( delegate(string host) { string[] rr; try { // Don't "suppress errors" but drop them for "No User exists for *" rr = Shell("query user \"/server:" + host + "\"").Split('\n'); } catch { //continue; return; } int usernameoffset = -1; int usernameend = -1; int lotimeoffset = -1; int lotimeend = -1; int idtimeoffset = -1; if (rr.Length > 0) { usernameoffset = rr[0].IndexOf(" USERNAME"); if (usernameoffset > -1) { usernameend = rr[0].IndexOf(" ", usernameoffset + 1); } lotimeoffset = rr[0].IndexOf(" LOGON TIME"); if (lotimeoffset > -1) { lotimeend = rr[0].IndexOf(" ", lotimeoffset + 1); } idtimeoffset = rr[0].IndexOf(" IDLE TIME"); } if (usernameoffset > -1 && usernameend > usernameoffset && lotimeoffset > usernameoffset && idtimeoffset > -1) { usernameoffset++; usernameend--; lotimeoffset++; if (lotimeend > -1) { lotimeend--; } idtimeoffset = idtimeoffset + 10; for (int j = 1; j < rr.Length; j++) // First line is a header, so skip it. { string line = rr[j].Trim('\r'); if (line.Length <= lotimeoffset) { continue; } string username = line.Substring(usernameoffset, usernameend - usernameoffset).Trim(); string logontime; if (lotimeend > -1) { logontime = line.Substring(lotimeoffset, lotimeend - lotimeoffset).Trim(); } else { logontime = line.Substring(lotimeoffset).Trim(); } string idletime = ""; { string subline = rr[j].Substring(0, idtimeoffset).Trim(); for (int li = subline.Length - 1; li >= 0; li--) { if (subline[li] != ' ') { idletime = subline[li] + idletime; } else { break; } } } string nodename = host; lock (results) { results.Add(username + " is logged on " + nodename + " since " + logontime + ". Idle for " + idletime + " min"); } } } } ), hosts, hosts.Count); if (results.Count > 0) { results.Sort(); for (int ir = 0; ir < results.Count; ir++) { Console.WriteLine(" {0}", results[ir]); } } else { Console.WriteLine("No users logged on"); } } break; case "killall": Console.Error.WriteLine("Cannot killall from here"); break; case "adminlock": /* if (args.Length >= 2 && args[1] == "-f") { EnterAdminCmd(true, true); // BypassJobs=true, PersistLock=true } else { EnterAdminCmd(false, true); // BypassJobs=false, PersistLock=true } * */ EnterAdminLock(); Console.WriteLine("Locked"); break; case "adminunlock": case "unlock": /* if (null == PersistAdminUser) { Console.WriteLine("No lock is in effect"); } else { if (IsLockAllow) { _LeaveAdminCmd(); Console.WriteLine("Lock released"); } else { // Won't reach here because I'm locked out from getting here anyway. //Console.Error.WriteLine("Permission denied: cluster locked by another user"); } } * */ if (!LeaveAdminLock()) { Console.WriteLine("No lock is in effect"); } else { Console.WriteLine("Lock released"); } break; case "memcacheinstall": { if (args.Length < 4) { Console.Error.WriteLine("Usage: {0} memcacheinstall <@hosts|host[,host...]> <username> <password>", appname); SetFailure(); return; } string[] hosts = null; if (args[1][0] == '@') { hosts = Surrogate.GetHostsFromFile(args[1].Substring(1)); } else { hosts = args[1].Split(';', ','); } if (hosts.Length == 0) { Console.Error.WriteLine("No host provided."); SetFailure(); return; } string username = args[2]; string password = args[3]; string localhost = System.Net.Dns.GetHostName(); string srcfp = NetworkPathForHost(System.Net.Dns.GetHostName()) + @"\MemCachePin.exe"; int nthreads = hosts.Length; if (nthreads > 10) { nthreads = 10; } MySpace.DataMining.Threading.ThreadTools<string>.Parallel( new Action<string>( delegate(string host) { string sout = Shell(@"sc \\" + host + " query MemCachePin", true); if (sout.IndexOf("SERVICE_NAME: MemCachePin") > -1) { lock (hosts) { Console.WriteLine("Service found on {0}", host); } sout = Shell(@"sc \\" + host + " stop MemCachePin", true); lock (hosts) { Console.WriteLine("Stopping service on {0}", host); Console.WriteLine(sout); } sout = Shell(@"sc \\" + host + " delete MemCachePin", true); lock (hosts) { Console.WriteLine("Deleting service on {0}", host); Console.WriteLine(sout); } } if (string.Compare(host, localhost, true) != 0) { Console.WriteLine("Copying file to host {0}", host); string destfp = NetworkPathForHost(host) + @"\MemCachePin.exe"; try { // Remove read-only. System.IO.FileAttributes destattribs = System.IO.File.GetAttributes(destfp); if ((destattribs & System.IO.FileAttributes.ReadOnly) == System.IO.FileAttributes.ReadOnly) { System.IO.File.SetAttributes(destfp, destattribs & ~System.IO.FileAttributes.ReadOnly); } } catch { } try { System.IO.File.Copy(srcfp, destfp, true); } catch (Exception e) { lock (hosts) { Console.WriteLine("Error copying file to host " + host + ". Source: " + srcfp + "; destination: " + destfp + ". Error: " + e.ToString()); return; } } } sout = Shell(@"sc \\" + host + " create MemCachePin binPath= \"" + AELight_Dir + @"\" + "MemCachePin.exe\" start= auto obj= \"" + username + "\" DisplayName= MemCachePin password= \"" + password + "\""); lock (hosts) { Console.WriteLine("Deploying to {0}", host); Console.WriteLine(sout); } sout = Shell(@"sc \\" + host + " start MemCachePin", false); lock (hosts) { Console.WriteLine("Starting service on {0}", host); Console.WriteLine(sout); } }), hosts, nthreads); } break; case "deploy": case "deploymt": case "deployst": { #if DEBUG //System.Threading.Thread.Sleep(1000 * 8); #endif EnterAdminCmd(); if (!dfs.DfsConfigExists(DFSXMLPATH)) { Console.Error.WriteLine("DFS not setup; use: {0} format", appname); SetFailure(); return; } if (isdspace) { Console.Error.WriteLine("Cannot deploy from {0}", appname); SetFailure(); return; } bool withMemCachePin = false; string arghosts = null; for (int iarg = 1; iarg < args.Length; iarg++) { if ("-f" == args[iarg]) { // Ignore. } else if (0 == string.Compare("-MemCachePin", args[iarg], true) || 0 == string.Compare("-MemCache", args[iarg], true)) { withMemCachePin = true; } else { arghosts = args[iarg]; } } string[] hosts; bool thishostcheck = false; if (null != arghosts) { if (arghosts.StartsWith("@")) { hosts = Surrogate.GetHostsFromFile(arghosts.Substring(1)); } else { hosts = arghosts.Split(';', ','); } thishostcheck = true; } else { dfs dc = LoadDfsConfig(); hosts = dc.Slaves.SlaveList.Split(';'); } if (withMemCachePin) { Console.WriteLine("Including MemCachePin service"); } { string curdir = System.Environment.CurrentDirectory; try { System.Environment.CurrentDirectory = AELight_Dir; //Console.WriteLine(Shell("cleanup.bat", true)); bool singlethreaded = act == "deployst"; int threadcount = singlethreaded ? 1 : hosts.Length; if (threadcount > 15) { threadcount = 15; } List<string> copyfiles = new List<string>(); { string sr1 = Surrogate.WildcardRegexString("temp_*-????-????-????-*.dll"); System.Text.RegularExpressions.Regex r1 = new System.Text.RegularExpressions.Regex(sr1, System.Text.RegularExpressions.RegexOptions.IgnoreCase | System.Text.RegularExpressions.RegexOptions.Singleline); string sr2 = Surrogate.WildcardRegexString("dbg_*~*_????????-????-????-????-????????????.*"); System.Text.RegularExpressions.Regex r2 = new System.Text.RegularExpressions.Regex(sr2, System.Text.RegularExpressions.RegexOptions.IgnoreCase | System.Text.RegularExpressions.RegexOptions.Singleline); foreach (System.IO.FileInfo fi in (new System.IO.DirectoryInfo(".")).GetFiles("*.exe")) { string fn = fi.Name; if (r2.IsMatch(fn)) { continue; } if(!withMemCachePin && 0 == string.Compare(fn, "MemCachePin.exe", true)) { continue; } copyfiles.Add(fn); } foreach (System.IO.FileInfo fi in (new System.IO.DirectoryInfo(".")).GetFiles("*.dll")) { string fn = fi.Name; if (!r1.IsMatch(fn) && !r2.IsMatch(fn)) { copyfiles.Add(fn); } } foreach (System.IO.FileInfo fi in (new System.IO.DirectoryInfo(".")).GetFiles("haarcascade_*.xml")) { string fn = fi.Name; copyfiles.Add(fn); } copyfiles.Add("cleanup.bat"); copyfiles.Add("serviceconfig.xml"); copyfiles.Add("MySpace.DataMining.DistributedObjects.DistributedObjectsSlave.exe.config"); copyfiles.Add("licenses_and_attributions.txt"); } int nrealdeploy = 0; //for (int hi = 0; hi < hosts.Length; hi++) MySpace.DataMining.Threading.ThreadTools<string>.Parallel( new Action<string>( delegate(string host) { //string host = hosts[hi]; string netpath = NetworkPathForHost(host); try { // Do this before the host check, // so it includes a nonparticipating surrogate. System.IO.File.Delete(netpath + @"\execq.dat"); } catch { } if (thishostcheck) { if (0 == string.Compare(IPAddressUtil.GetName(System.Net.Dns.GetHostName()), IPAddressUtil.GetName(host), StringComparison.OrdinalIgnoreCase)) { lock (hosts) { Console.WriteLine("Not deploying to {0} (host check)", host); } return; } } else { if (dfs.DfsConfigExists(netpath + @"\" + dfs.DFSXMLNAME)) { lock (hosts) { Console.WriteLine("Not deploying to {0} (metadata check)", host); } return; } } { string sout = Shell(@"sc \\" + host + " stop DistributedObjects", true); lock (hosts) { //Console.WriteLine("Deploying to {0}:", host); Console.WriteLine("Deploying to {0}", netpath); Console.WriteLine(sout); // Suppress error. } if (0 != System.IO.Directory.GetFiles(".", "*.pid").Length) { System.Threading.Thread.Sleep(1000 * 8); } if (withMemCachePin) { System.Threading.Thread.Sleep(1000); Shell(@"sc \\" + host + " stop MemCachePin", true); } System.Threading.Thread.Sleep(1000 * 2); for (int copyretry = 0; ; copyretry++) { try { for (int i = 0; i < copyfiles.Count; i++) { string destfp = netpath + @"\" + copyfiles[i]; try { // Remove read-only. System.IO.FileAttributes destattribs = System.IO.File.GetAttributes(destfp); if ((destattribs & System.IO.FileAttributes.ReadOnly) == System.IO.FileAttributes.ReadOnly) { System.IO.File.SetAttributes(destfp, destattribs & ~System.IO.FileAttributes.ReadOnly); } } catch { } try { System.IO.File.Copy(copyfiles[i], destfp, true); } catch { if (copyretry < 10) { throw; } lock (hosts) { Console.WriteLine("(errcopy:{0}:{1})", host, copyfiles[i]); ConsoleFlush(); } } } break; } catch { /*if (copyretry >= 20) { throw; }*/ lock (hosts) { Console.Write('.'); ConsoleFlush(); } //System.Threading.Thread.Sleep(1000 * 2); } } //Console.WriteLine("{0} files copied", copyfiles.Count); string uout = Shell(@"sc \\" + host + " start DistributedObjects", false); lock (hosts) { Console.WriteLine("Starting {0}:", host); Console.WriteLine(uout); // Throws on error. nrealdeploy++; } if (withMemCachePin) { System.Threading.Thread.Sleep(1000); Shell(@"sc \\" + host + " start MemCachePin", true); } } } ), hosts, threadcount); Console.WriteLine("Deployed to {0} hosts", nrealdeploy); } finally { System.Environment.CurrentDirectory = curdir; } } } break; case "harddrivespeedtest": { ulong filesize = 64 * 1024 * 1024; int iarg = 1; if (args.Length > iarg) { try { filesize = (ulong)ParseLongCapacity(args[iarg]); iarg++; } catch { } } if (filesize < 1024 * 1024) { Console.Error.WriteLine("Filesize must be at least 1MB", appname); SetFailure(); return; } string[] hosts; if (args.Length > iarg) { string shosts = args[iarg]; if (shosts.StartsWith("@")) { hosts = Surrogate.GetHostsFromFile(shosts.Substring(1)); } else { hosts = shosts.Split(';', ','); } } else { dfs dc = LoadDfsConfig(); hosts = dc.Slaves.SlaveList.Split(';'); } Dictionary<string, double> reads = new Dictionary<string, double>(); Dictionary<string, double> writes = new Dictionary<string, double>(); for (int i = 0; i < hosts.Length; i++) { string host = hosts[i]; Console.WriteLine("Testing: {0}", host); double write = 0; double read = 0; Surrogate.HardDriveSpeedTest(host, filesize, ref write, ref read); reads.Add(host, read); writes.Add(host, write); } List<KeyValuePair<string, double>> sReads = new List<KeyValuePair<string, double>>(reads); sReads.Sort( delegate(KeyValuePair<string, double> firstPair, KeyValuePair<string, double> nextPair) { return firstPair.Value.CompareTo(nextPair.Value); } ); List<KeyValuePair<string, double>> sWrites = new List<KeyValuePair<string, double>>(writes); sWrites.Sort( delegate(KeyValuePair<string, double> firstPair, KeyValuePair<string, double> nextPair) { return firstPair.Value.CompareTo(nextPair.Value); } ); Console.WriteLine(); Console.WriteLine("Read Speed"); foreach (KeyValuePair<string, double> p in sReads) { Console.WriteLine("{0}: {1} MB/s", p.Key, p.Value); } Console.WriteLine(); Console.WriteLine("Write Speed"); foreach (KeyValuePair<string, double> p in sWrites) { Console.WriteLine("{0}: {1} MB/s", p.Key, p.Value); } } break; case "cputemp": { dfs dc = LoadDfsConfig(); string[] hosts; if (args.Length > 1) { string shosts = args[1]; if (shosts.StartsWith("@")) { hosts = Surrogate.GetHostsFromFile(shosts.Substring(1)); } else { hosts = shosts.Split(';', ','); } } else { hosts = dc.Slaves.SlaveList.Split(';'); } Dictionary<string, double> temps = new Dictionary<string, double>(); int nThreads = hosts.Length / 15; if (nThreads * 15 < hosts.Length) { nThreads++; } if (nThreads > 15) { nThreads = 15; } MySpace.DataMining.Threading.ThreadTools<string>.Parallel( new Action<string>( delegate(string slave) { lock (temps) { Console.WriteLine("Getting temp: {0}", slave); } double temp = Surrogate.GetCPUTemperature(slave); lock (temps) { Console.WriteLine("Temp returned from {0}: {1}", slave, temp); temps.Add(slave, temp); } } ), hosts, nThreads); //Sort List<KeyValuePair<string, double>> sTemps = new List<KeyValuePair<string, double>>(temps); sTemps.Sort( delegate(KeyValuePair<string, double> firstPair, KeyValuePair<string, double> nextPair) { return firstPair.Value.CompareTo(nextPair.Value); } ); Console.WriteLine(); Console.WriteLine("Sorted temperature:"); double total = 0; foreach (KeyValuePair<string, double> p in sTemps) { Console.WriteLine("{0}: {1} F", p.Key, p.Value); total += p.Value; } double min = sTemps[0].Value; double max = sTemps[sTemps.Count - 1].Value; Console.WriteLine(); Console.WriteLine("Min Temp: {0} F", min); Console.WriteLine("Max Temp: {0} F", max); Console.WriteLine("Avg Temp: {0} F", total / (double)sTemps.Count); } break; case "packetsniff": SafePacketSniff(args); break; case "networkspeedtest": { ulong filesize = 64 * 1024 * 1024; int iarg = 1; if (args.Length > iarg) { try { filesize = (ulong)ParseLongCapacity(args[iarg]); iarg++; } catch { } } if (filesize < 1024 * 1024) { Console.Error.WriteLine("Filesize must be at least 1MB", appname); SetFailure(); return; } string[] hosts; if (args.Length > iarg) { string shosts = args[iarg]; if (shosts.StartsWith("@")) { hosts = Surrogate.GetHostsFromFile(shosts.Substring(1)); } else { hosts = shosts.Split(';', ','); } } else { dfs dc = LoadDfsConfig(); hosts = dc.Slaves.SlaveList.Split(';'); } List<List<double>> download = new List<List<double>>(); List<List<double>> upload = new List<List<double>>(); Surrogate.NetworkSpeedTest(hosts, filesize, download, upload); Dictionary<string, double> avgDownload = new Dictionary<string, double>(); Dictionary<string, double> avgUpload = new Dictionary<string, double>(); for (int i = 0; i < hosts.Length; i++) { string host = hosts[i]; //Get avg download for this host. double avg = 0; for (int j = 0; j < download[i].Count; j++) { avg += download[i][j]; } avg = avg / (double)download[i].Count; avgDownload.Add(host, avg); //Get avg upload for this host. avg = 0; for (int j = 0; j < upload[i].Count; j++) { avg += upload[i][j]; } avg = avg / (double)upload[i].Count; avgUpload.Add(host, avg); } //Sort List<KeyValuePair<string, double>> sDown = new List<KeyValuePair<string, double>>(avgDownload); sDown.Sort( delegate(KeyValuePair<string, double> firstPair, KeyValuePair<string, double> nextPair) { return firstPair.Value.CompareTo(nextPair.Value); } ); List<KeyValuePair<string, double>> sUp = new List<KeyValuePair<string, double>>(avgUpload); sUp.Sort( delegate(KeyValuePair<string, double> firstPair, KeyValuePair<string, double> nextPair) { return firstPair.Value.CompareTo(nextPair.Value); } ); Console.WriteLine("Download speed"); foreach (KeyValuePair<string, double> p in sDown) { Console.WriteLine("{0}: {1} MB/s", p.Key, p.Value); } Console.WriteLine(); Console.WriteLine("Upload speed"); foreach (KeyValuePair<string, double> p in sUp) { Console.WriteLine("{0}: {1} MB/s", p.Key, p.Value); } } break; case "perfmon": { Perfmon.SafeGetCounters(SubArray(args, 1)); } break; case "genhostname": case "genhostnames": { if (args.Length < 4) { Console.Error.WriteLine("Error: genhostnames command needs arguments: <pattern> <startNum> <endNum> [<delimiter>]"); return; } string pattern = args[1].Trim(); int startNum = 0; int endNum = 0; try { startNum = Int32.Parse(args[2]); endNum = Int32.Parse(args[3]); } catch { Console.Error.WriteLine("Error: startNum / endNum are not valid integers."); return; } string del = ";"; if (args.Length > 4) { del = args[4].Replace(@"\n", Environment.NewLine); } int pad = 0; int shp = pattern.IndexOf('#'); if (shp > -1) { pad = pattern.LastIndexOf('#') - shp + 1; pattern = pattern.Substring(0, shp); } for (int i = startNum; i <= endNum; i++) { string part = i.ToString().PadLeft(pad, '0'); Console.Write(pattern + part); if (i != endNum) { Console.Write(del); } } } break; case "scrapeemptynames": { List<dfs.DfsFile> goods = new List<dfs.DfsFile>(); int badcount = 0; using (LockDfsMutex()) { dfs dc = LoadDfsConfig(); for (int i = 0; i < dc.Files.Count; i++) { dfs.DfsFile file = dc.Files[i]; if (file.Type != DfsFileTypes.NORMAL || file.Name.Trim().Length > 0) { goods.Add(file); } else { badcount++; } } dc.Files = goods; UpdateDfsXml(dc); } if (badcount > 0) { Console.WriteLine("{0} empty names are scraped.", badcount); } } break; default: if ('-' == args[0][0]) { Console.Error.WriteLine("Error: Unrecognized action: {0}", args[0]); } else { Console.Error.WriteLine("Error: Action expected"); } ShowUsage(); break; } }