static void NewDFS(string[] args) { if (args.Length <= 1 || !System.IO.File.Exists(args[1])) { throw new Exception("Expected path to DFS.xml"); } string dfsxmlpath = args[1]; string dfsxmlpathbackup = dfsxmlpath + "$" + Guid.NewGuid().ToString(); string masterdir; { System.IO.FileInfo fi = new System.IO.FileInfo(dfsxmlpath); masterdir = fi.DirectoryName; // Directory's full path. } Surrogate.SetNewMetaLocation(masterdir); dfs olddfs = dfs.ReadDfsConfig_unlocked(dfsxmlpath); string masterhost = System.Net.Dns.GetHostName(); string[] allmachines; { string[] sl = olddfs.Slaves.SlaveList.Split(';'); List <string> aml = new List <string>(sl.Length + 1); aml.Add(masterhost); foreach (string slave in sl) { if (0 != string.Compare(IPAddressUtil.GetName(slave), IPAddressUtil.GetName(masterhost), StringComparison.OrdinalIgnoreCase)) { aml.Add(slave); } } allmachines = aml.ToArray(); } Console.WriteLine("Backing up DFS.xml to: {0} ...", dfsxmlpathbackup); try { System.IO.File.Delete(dfsxmlpathbackup); } catch { } System.IO.File.Move(dfsxmlpath, dfsxmlpathbackup); try { { Console.WriteLine("Formatting DFS for test..."); Exec.Shell("Qizmt @format Machines=" + string.Join(",", allmachines)); } { // Test logic: } Console.WriteLine("[PASSED] - " + string.Join(" ", args)); } finally { Console.WriteLine("Restoring DFS.xml backup..."); // Note: these are safe; the try/finally only wraps the new dfs. try { Exec.Shell("Qizmt del *"); } catch { } try { // Delete temp dfs.xml, it's being replaced with the good one. System.IO.File.Delete(dfsxmlpath); } catch { } System.IO.File.Move(dfsxmlpathbackup, dfsxmlpath); } }
static void EnableReplication(string[] args) { if (args.Length <= 1 || !System.IO.File.Exists(args[1])) { throw new Exception("Expected path to DFS.xml"); } bool withcache = -1 != args[0].IndexOf("withcache", StringComparison.OrdinalIgnoreCase); string dfsxmlpath = args[1]; string dfsxmlpathbackup = dfsxmlpath + "$" + Guid.NewGuid().ToString(); long bytes_to_add = 0; if (args.Length > 2) { // To-do: ParseCapacity. bytes_to_add = long.Parse(args[2]); if (bytes_to_add < 0) { throw new Exception("Invalid bytes-to-add (" + bytes_to_add.ToString() + " bytes)"); } if (bytes_to_add < 1048576) { throw new Exception("bytes-to-add must be at least 1 MB"); } } int num_files = 0; if (args.Length > 3) { num_files = int.Parse(args[3]); if (num_files < 0 || num_files > bytes_to_add / 20) { throw new Exception("Invalid #files"); } } string masterdir; { System.IO.FileInfo fi = new System.IO.FileInfo(dfsxmlpath); masterdir = fi.DirectoryName; // Directory's full path. } Surrogate.SetNewMetaLocation(masterdir); dfs olddfs = dfs.ReadDfsConfig_unlocked(dfsxmlpath); string masterhost = System.Net.Dns.GetHostName(); string[] allmachines; { string[] sl = olddfs.Slaves.SlaveList.Split(';'); List <string> aml = new List <string>(sl.Length + 1); aml.Add(masterhost); foreach (string slave in sl) { if (0 != string.Compare(IPAddressUtil.GetName(slave), IPAddressUtil.GetName(masterhost), StringComparison.OrdinalIgnoreCase)) { aml.Add(slave); } } allmachines = aml.ToArray(); } Console.WriteLine("Backing up DFS.xml to: {0} ...", dfsxmlpathbackup); try { System.IO.File.Delete(dfsxmlpathbackup); } catch { } System.IO.File.Move(dfsxmlpath, dfsxmlpathbackup); try { { Console.WriteLine("Formatting DFS for test..."); Exec.Shell("Qizmt @format Machines=" + string.Join(",", allmachines)); } { // Test logic: Console.WriteLine("Adding some files to DFS..."); Console.Write(" "); Exec.Shell("Qizmt bingen 1MB 1MB 50"); Console.Write("10%"); Exec.Shell("Qizmt examples"); Console.Write("..15%"); Exec.Shell("Qizmt wordgen 10MB 10MB 100"); // Note: also used by Cacher. Console.Write("..50%"); Exec.Shell("Qizmt asciigen 50MB 50MB 500"); Console.Write("..100%"); Console.WriteLine(); if (bytes_to_add > 0) { Console.WriteLine("Adding {0} bytes as requested (bytes-to-add)...", bytes_to_add); long bta10 = bytes_to_add / 10; Console.Write(" "); Exec.Shell("Qizmt gen bta10-" + Guid.NewGuid().ToString() + " " + bta10.ToString()); Console.Write("10%"); Exec.Shell("Qizmt gen bta20-" + Guid.NewGuid().ToString() + " " + (bta10 * 2).ToString()); Console.Write("..30%"); { long totsz = (bta10 * 3); if (num_files > 1) { long onesz = totsz / num_files; //for (int inf = 0; inf < num_files; inf++) MySpace.DataMining.Threading.ThreadTools.Parallel( new Action <int>( delegate(int inf) { Exec.Shell("Qizmt gen bta30." + inf.ToString() + "-" + Guid.NewGuid().ToString() + " " + onesz.ToString()); }), num_files, 15); } else { Exec.Shell("Qizmt gen bta30-" + Guid.NewGuid().ToString() + " " + totsz.ToString()); Console.Write("..60%"); } } Exec.Shell("Qizmt gen bta40-" + Guid.NewGuid().ToString() + " " + (bta10 * 4).ToString()); Console.Write("..100%"); Console.WriteLine(); } if (withcache) { Console.WriteLine("Generating cache files..."); string cachertempdir = @"\\" + System.Net.Dns.GetHostName() + @"\C$\temp\qizmt\regression_test_Cacher-" + Guid.NewGuid().ToString(); if (!System.IO.Directory.Exists(cachertempdir)) { System.IO.Directory.CreateDirectory(cachertempdir); } string cacherfp = cachertempdir + @"\Cacher"; System.IO.File.WriteAllText(cacherfp, (@"<?xml version=`1.0` encoding=`utf-8`?> <SourceCode> <Jobs> <Job Name=`Cacher`> <Delta> <Name>Cacher_cache</Name> <DFSInput>dfs://10MB</DFSInput> </Delta> <IOSettings> <JobType>mapreduce</JobType> <KeyLength>100</KeyLength> <DFSInput></DFSInput> <DFSOutput>dfs://Cacher_output</DFSOutput> <KeyMajor>8</KeyMajor> <OutputMethod>grouped</OutputMethod> </IOSettings> <MapReduce> <Map> <![CDATA[ public virtual void Map(ByteSlice line, MapOutput output) { output.Add(line, ByteSlice.Prepare()); } ]]> </Map> <Reduce> <![CDATA[ public override void Reduce(ByteSlice key, RandomAccessEntries values, RandomAccessOutput output) { for(int i = 0; i < values.Length; i++) { output.Add(key); } } ]]> </Reduce> </MapReduce> </Job> </Jobs> </SourceCode> ").Replace('`', '"')); Exec.Shell("Qizmt importdir " + cachertempdir); try { System.IO.File.Delete(cacherfp); System.IO.Directory.Delete(cachertempdir); } catch { } Exec.Shell("Qizmt exec Cacher"); // Creates cache file Cacher_cache Exec.Shell("Qizmt del Cacher_output"); } Console.WriteLine("Ensure the cluster is perfectly healthy..."); EnsurePerfectQizmtHealtha(); string ls_output = Exec.Shell("Qizmt ls"); int ls_output_linecount = ls_output.Split('\n').Length; Console.WriteLine("*** ls output before replication:"); Console.WriteLine(ls_output); Console.WriteLine("Updating Replication Factor..."); const int replicationfactor = 2; Console.WriteLine(Exec.Shell("Qizmt replicationupdate " + replicationfactor.ToString())); Console.WriteLine("Ensure the cluster is perfectly healthy..."); EnsurePerfectQizmtHealtha(); if (withcache) { Console.WriteLine("Validate cache files..."); Exec.Shell("Qizmt exec Cacher"); // Uses existing cache file Cacher_cache Exec.Shell("Qizmt del Cacher_output"); } Console.WriteLine("Ensure data is replicated..."); EnsureReplication(dfsxmlpath, replicationfactor); { // Not comparing contents because of the free disk space line. string new_ls_output = Exec.Shell("Qizmt ls"); Console.WriteLine("*** ls output after replication:"); Console.WriteLine(new_ls_output); int new_ls_output_linecount = new_ls_output.Split('\n').Length; if (ls_output_linecount != new_ls_output_linecount) { throw new Exception("Cluster does not contain the same files as before replication"); } } } Console.WriteLine("[PASSED] - " + string.Join(" ", args)); } finally { Console.WriteLine("Restoring DFS.xml backup..."); // Note: these are safe; the try/finally only wraps the new dfs. try { Exec.Shell("Qizmt del *"); } catch { } try { // Delete temp dfs.xml, it's being replaced with the good one. System.IO.File.Delete(dfsxmlpath); } catch { } System.IO.File.Move(dfsxmlpathbackup, dfsxmlpath); } }
static void SortedCache(string[] args) { if (args.Length <= 1 || !System.IO.File.Exists(args[1])) { throw new Exception("Expected path to DFS.xml"); } string dfsxmlpath = args[1]; string dfsxmlpathbackup = dfsxmlpath + "$" + Guid.NewGuid().ToString(); string masterdir; { System.IO.FileInfo fi = new System.IO.FileInfo(dfsxmlpath); masterdir = fi.DirectoryName; // Directory's full path. } Surrogate.SetNewMetaLocation(masterdir); dfs dc = dfs.ReadDfsConfig_unlocked(dfsxmlpath); string masterhost = System.Net.Dns.GetHostName(); string[] allmachines; { string[] sl = dc.Slaves.SlaveList.Split(';'); List <string> aml = new List <string>(sl.Length + 1); aml.Add(masterhost); foreach (string slave in sl) { if (0 != string.Compare(IPAddressUtil.GetName(slave), IPAddressUtil.GetName(masterhost), StringComparison.OrdinalIgnoreCase)) { aml.Add(slave); } } allmachines = aml.ToArray(); } { // Test logic: string fguid = "{" + Guid.NewGuid().ToString() + "}"; string jobfn = "regression_test_SortedCache-" + Guid.NewGuid().ToString(); SortedCacheCleanup(jobfn); // Cleanup previous run. try { { Console.WriteLine("Generating data and jobs..."); string exectempdir = @"\\" + System.Net.Dns.GetHostName() + @"\C$\temp\qizmt\regression_test_SortedCache" + fguid; if (!System.IO.Directory.Exists(exectempdir)) { System.IO.Directory.CreateDirectory(exectempdir); } string execfp = exectempdir + @"\" + jobfn; string scguid = Guid.NewGuid().ToString(); string ECODE = (@"<SourceCode> <Jobs> <Job Name=`CS` Custodian=`Chris Miller` Email=``> <Delta> <Name>{D7D3A6FE-8472-4320-9144-486E436D4542}CS_cache</Name> <DFSInput>{D7D3A6FE-8472-4320-9144-486E436D4542}a.txt;dfs://{D7D3A6FE-8472-4320-9144-486E436D4542}b?.txt</DFSInput> </Delta> <IOSettings> <JobType>mapreduce</JobType> <KeyLength>1</KeyLength> <DFSInput></DFSInput> <DFSOutput>dfs://{D7D3A6FE-8472-4320-9144-486E436D4542}CS_Output.txt</DFSOutput> <OutputMethod>{9235036E-4A47-4ee5-985F-F19D2F2DE85C}</OutputMethod> </IOSettings> <MapReduce> <Map> <![CDATA[ List<byte> foo = new List<byte>(); List<byte> bar = new List<byte>(); public virtual void Map(ByteSlice line, MapOutput output) { foo.Clear(); bar.Clear(); foo.Add((byte)('A' + line[0] % 16)); // A-F only. bar.Add(line[1]); output.Add(ByteSlice.Prepare(foo), ByteSlice.Prepare(bar)); } ]]> </Map> <Reduce> <![CDATA[ public override void Reduce(ByteSlice key, ByteSliceList values, ReduceOutput output) { long result = 0; while(values.MoveNext()) { ByteSlice v = values.Current; result += v[0]; } mstring ms = mstring.Prepare(); ms.AppendM((char)key[0]); ms.AppendM(result); output.Add(ms); } ]]> </Reduce> </MapReduce> </Job> </Jobs> </SourceCode> ").Replace('`', '"'); System.IO.File.WriteAllText(execfp + ".grouped", ECODE.Replace("{9235036E-4A47-4ee5-985F-F19D2F2DE85C}", "grouped")); System.IO.File.WriteAllText(execfp + ".sorted", ECODE.Replace("{9235036E-4A47-4ee5-985F-F19D2F2DE85C}", "sorted")); Exec.Shell("Qizmt importdir " + exectempdir); try { System.IO.File.Delete(execfp + ".grouped"); System.IO.File.Delete(execfp + ".sorted"); System.IO.Directory.Delete(exectempdir); } catch { } Exec.Shell("Qizmt asciigen {D7D3A6FE-8472-4320-9144-486E436D4542}a.txt 16KB 2B"); Exec.Shell("Qizmt asciigen {D7D3A6FE-8472-4320-9144-486E436D4542}b1.txt 8KB 2B"); Exec.Shell("Qizmt asciigen {D7D3A6FE-8472-4320-9144-486E436D4542}b2.txt_ 8KB 2B"); } string checksum_grouped = ""; { Console.WriteLine("Running grouped job..."); Exec.Shell("Qizmt del {D7D3A6FE-8472-4320-9144-486E436D4542}CS_cache"); Exec.Shell("Qizmt del {D7D3A6FE-8472-4320-9144-486E436D4542}CS_Output.txt"); Exec.Shell("Qizmt exec " + jobfn + ".grouped"); Exec.Shell("Qizmt rename {D7D3A6FE-8472-4320-9144-486E436D4542}b2.txt_ {D7D3A6FE-8472-4320-9144-486E436D4542}b2.txt"); Exec.Shell("Qizmt del {D7D3A6FE-8472-4320-9144-486E436D4542}CS_Output.txt"); Exec.Shell("Qizmt exec " + jobfn + ".grouped"); Exec.Shell("Qizmt rename {D7D3A6FE-8472-4320-9144-486E436D4542}b2.txt {D7D3A6FE-8472-4320-9144-486E436D4542}b2.txt_"); checksum_grouped = DfsSum("Sum2", "{D7D3A6FE-8472-4320-9144-486E436D4542}CS_Output.txt"); Console.WriteLine(" checksum2 = {0}", checksum_grouped); } string checksum_sorted = ""; { Console.WriteLine("Running sorted job..."); Exec.Shell("Qizmt del {D7D3A6FE-8472-4320-9144-486E436D4542}CS_cache"); Exec.Shell("Qizmt del {D7D3A6FE-8472-4320-9144-486E436D4542}CS_Output.txt"); Exec.Shell("Qizmt exec " + jobfn + ".sorted"); Exec.Shell("Qizmt rename {D7D3A6FE-8472-4320-9144-486E436D4542}b2.txt_ {D7D3A6FE-8472-4320-9144-486E436D4542}b2.txt"); Exec.Shell("Qizmt del {D7D3A6FE-8472-4320-9144-486E436D4542}CS_Output.txt"); Exec.Shell("Qizmt exec " + jobfn + ".sorted"); Exec.Shell("Qizmt rename {D7D3A6FE-8472-4320-9144-486E436D4542}b2.txt {D7D3A6FE-8472-4320-9144-486E436D4542}b2.txt_"); checksum_sorted = DfsSum("Sum2", "{D7D3A6FE-8472-4320-9144-486E436D4542}CS_Output.txt"); Console.WriteLine(" checksum2 = {0}", checksum_sorted); } if (checksum_grouped != checksum_sorted) { throw new Exception("Checksums do not match; sort with cache test failed!"); } } finally { SortedCacheCleanup(jobfn); } } Console.WriteLine("[PASSED] - " + string.Join(" ", args)); }
static void SortedCacheCleanup(string jobfn) { Exec.Shell("Qizmt del " + jobfn + "*"); Exec.Shell("Qizmt del {D7D3A6FE-8472-4320-9144-486E436D4542}*"); }
static void MetaRemoveMachine(string[] args) { if (args.Length <= 1 || !System.IO.File.Exists(args[1])) { throw new Exception("Expected path to DFS.xml"); } string dfsxmlpath = args[1]; string dfsxmlpathbackup = dfsxmlpath + "$" + Guid.NewGuid().ToString(); string masterdir; { System.IO.FileInfo fi = new System.IO.FileInfo(dfsxmlpath); masterdir = fi.DirectoryName; // Directory's full path. } Surrogate.SetNewMetaLocation(masterdir); string masterslavedat = masterdir + @"\slave.dat"; dfs olddfs = dfs.ReadDfsConfig_unlocked(dfsxmlpath); int iarg = 2; string sreplication = "2"; /*if (args.Length > iarg) * { * if (args[iarg].StartsWith("#")) * { * sreplication = args[iarg++].Substring(1); * } * }*/ string masterhost = System.Net.Dns.GetHostName(); string[] allmachines; { string[] sl = olddfs.Slaves.SlaveList.Split(';'); List <string> aml = new List <string>(sl.Length + 1); aml.Add(masterhost); foreach (string slave in sl) { if (0 != string.Compare(IPAddressUtil.GetName(slave), IPAddressUtil.GetName(masterhost), StringComparison.OrdinalIgnoreCase)) { aml.Add(slave); } } allmachines = aml.ToArray(); } //if (three2two) { if (allmachines.Length < 3) { //throw new Exception("Need >= 3 machines for 3to2"); throw new Exception("Need >= 3 machines for this test"); } allmachines = new string[] { allmachines[0], allmachines[1], allmachines[2] }; } if (allmachines.Length < 3) { throw new Exception("Cluster needs at least 3 machines for this test"); } Console.WriteLine("Backing up DFS.xml to: {0} ...", dfsxmlpathbackup); try { System.IO.File.Delete(dfsxmlpathbackup); } catch { } System.IO.File.Move(dfsxmlpath, dfsxmlpathbackup); try { Console.WriteLine("Formatting DFS for test..."); { string fmtcmd = "Qizmt @format Machines=" + string.Join(",", allmachines) + " Replication=" + sreplication; Console.WriteLine(" {0}", fmtcmd); Exec.Shell(fmtcmd); } Console.WriteLine("Adding some files to DFS..."); Console.Write(" "); Exec.Shell("Qizmt bingen 1MB 1MB 50"); Console.Write("10%"); Exec.Shell("Qizmt examples"); Console.Write("..15%"); Exec.Shell("Qizmt wordgen 10MB 10MB 100"); Console.Write("..50%"); Exec.Shell("Qizmt asciigen 50MB 50MB 500"); Console.Write("..100%"); Console.WriteLine(); int ls_output_linecount = Exec.Shell("Qizmt ls").Split('\n').Length; Console.WriteLine("Ensure the cluster is perfectly healthy..."); EnsurePerfectQizmtHealtha(); Console.WriteLine("Run test job, save output..."); string md5_10MB_output = Exec.Shell("Qizmt md5 10MB"); { string rmachine = allmachines[allmachines.Length - 1]; Console.WriteLine("Removing machine (metaremovemachine -s {0}) ...", rmachine); Console.WriteLine(Exec.Shell("Qizmt metaremovemachine -s " + rmachine)); } Console.WriteLine("Run test job, confirm output..."); if (md5_10MB_output != Exec.Shell("Qizmt md5 10MB")) { throw new Exception("Test job output does not match previous run"); } Console.WriteLine("Ensuring meta-removing another machine fails (no replicationphase)"); { bool failed = false; string rmachine = allmachines[allmachines.Length - 2]; try { Console.WriteLine("Removing machine (metaremovemachine -s {0}) ...", rmachine); Console.WriteLine(Exec.Shell("Qizmt metaremovemachine -s " + rmachine)); } catch { failed = true; } if (!failed) { throw new Exception("metaremovemachine -s " + rmachine + " was supposed to fail"); } Console.WriteLine("Forcing meta-removal (metaremovemachine -s -f {0})", rmachine); Console.WriteLine(Exec.Shell("Qizmt metaremovemachine -s -f " + rmachine)); } Console.WriteLine("Run test job yet again, confirm output is different, due to missing parts..."); try { if (md5_10MB_output == Exec.Shell("Qizmt md5 10MB")) { throw new Exception("Test job output was not supposed to match previous run, but it matches"); } Console.WriteLine("Doesn't match; as expected"); } catch (Exception e) { Console.WriteLine("Doesn't match due to exception: {0}", e.Message); } Console.WriteLine("[PASSED] - " + string.Join(" ", args)); } finally { Console.WriteLine("Restoring DFS.xml backup..."); // Note: these are safe; the try/finally only wraps the new dfs. try { Exec.Shell("Qizmt del *"); } catch { } try { // Delete temp dfs.xml, it's being replaced with the good one. System.IO.File.Delete(dfsxmlpath); } catch { } System.IO.File.Move(dfsxmlpathbackup, dfsxmlpath); } }
static void RangeSort(string[] args) { string sortmethod = args[0]; if (args.Length <= 1 || !System.IO.File.Exists(args[1])) { throw new Exception("Expected path to DFS.xml"); } string dfsxmlpath = args[1]; // Checking for arg[2]==pause later... string masterdir; { System.IO.FileInfo fi = new System.IO.FileInfo(dfsxmlpath); masterdir = fi.DirectoryName; // Directory's full path. } Surrogate.SetNewMetaLocation(masterdir); dfs dc = dfs.ReadDfsConfig_unlocked(dfsxmlpath); string masterhost = System.Net.Dns.GetHostName(); string[] slaves = dc.Slaves.SlaveList.Split(';'); string[] allmachines; { List <string> aml = new List <string>(slaves.Length + 1); aml.Add(masterhost); foreach (string slave in slaves) { if (0 != string.Compare(IPAddressUtil.GetName(slave), IPAddressUtil.GetName(masterhost), StringComparison.OrdinalIgnoreCase)) { aml.Add(slave); } } allmachines = aml.ToArray(); } string pausefile = ""; if (args.Length > 2 && "pause" == args[2]) { string pausedir = @"\\" + masterhost + @"\c$\temp\qizmt"; try { System.IO.Directory.CreateDirectory(pausedir); } catch { } pausefile = pausedir + @"\" + sortmethod + @"-pause.txt"; System.IO.File.WriteAllText(pausefile, "Delete this file to un-pause..." + Environment.NewLine); Console.WriteLine(); Console.WriteLine("Delete the file '{0}' to un-pause...", pausefile); Console.WriteLine(); } { Console.WriteLine("Ensure cluster is perfectly healthy..."); EnsurePerfectQizmtHealtha(); { string fguid = "{" + Guid.NewGuid().ToString() + "}"; // Generate some data to operate on. Console.WriteLine("Generating data..."); // Note: this test depends on wordgen, and wordgen lines always starting with uppercase! long gensize = 1048576 * dc.Blocks.SortedTotalCount; // 1MB * #processes string gencmd = "wordgen"; int keymajor = 8; if (-1 != sortmethod.IndexOf("hash", StringComparison.OrdinalIgnoreCase)) { //gencmd = "bingen"; // Will write crazy files to c:\temp keymajor = 2; } Exec.Shell("Qizmt " + gencmd + " data" + fguid + " " + gensize.ToString()); try { string exectempdir = @"\\" + System.Net.Dns.GetHostName() + @"\C$\temp\qizmt\regression_test_" + sortmethod + @"-" + Guid.NewGuid().ToString(); if (!System.IO.Directory.Exists(exectempdir)) { System.IO.Directory.CreateDirectory(exectempdir); } string execfp = exectempdir + @"\exec" + fguid; // Note: using c:\temp instead of IOUtils.GetTempDirectory() in the following test // because I can't get the IOUtils.GetTempDirectory() for other machines. string scguid = Guid.NewGuid().ToString(); System.IO.File.WriteAllText(execfp, (@"<?xml version=`1.0` encoding=`utf-8`?> <SourceCode> <Jobs> <Job Name=`exec" + fguid + @"`> <IOSettings> <JobType>mapreduce</JobType> <KeyLength>100</KeyLength> <DFSInput>dfs://data" + fguid + @"</DFSInput> <DFSOutput>dfs://output" + fguid + @"</DFSOutput> <KeyMajor>" + keymajor.ToString() + @"</KeyMajor> <OutputMethod>" + sortmethod + @"</OutputMethod> <Setting name=`Subprocess_TotalPrime` value=`0` /> <!-- Don't use grouped. --> <Setting name=`Subprocess_SortedTotalCount` value=`" + slaves.Length.ToString() + @"` /> <!-- ^ One process per participating machine. --> </IOSettings> <MapReduce> <Map> <![CDATA[ public virtual void Map(ByteSlice line, MapOutput output) { output.Add(line, ByteSlice.Prepare()); } ]]> </Map> <ReduceInitialize><![CDATA[ public virtual void ReduceInitialize() { } ]]></ReduceInitialize> <Reduce> <![CDATA[ string dir = null; Dictionary<char, System.IO.StreamWriter> files = new Dictionary<char, System.IO.StreamWriter>(); public override void Reduce(ByteSlice key, ByteSliceList values, ReduceOutput output) { if(null == dir) { dir = @`\\` + Qizmt_MachineHost + @`\c$\temp\qizmt\" + sortmethod + @"-" + scguid + @"`; if(!System.IO.Directory.Exists(dir)) { System.IO.Directory.CreateDirectory(dir); } } System.IO.StreamWriter stmw; if(!files.ContainsKey((char)key[0])) { stmw = new System.IO.StreamWriter(dir + @`\` + (char)key[0] + `.txt`, true); // append=true files[(char)key[0]] = stmw; } stmw = files[(char)key[0]]; stmw.WriteLine(key.ToString()); } ]]> </Reduce> <ReduceFinalize><![CDATA[ public virtual void ReduceFinalize() { foreach(KeyValuePair<char, System.IO.StreamWriter> kvp in files) { kvp.Value.Close(); } } ]]></ReduceFinalize> </MapReduce> </Job> <Job Name=`Verify Sort Range` > <IOSettings> <JobType>local</JobType> </IOSettings> <Local> <![CDATA[ readonly string[] slaves = `" + dc.Slaves.SlaveList + @"`.Split(';'); readonly string pausefile = @`" + pausefile + @"`; public virtual void Local() { if(!string.IsNullOrEmpty(pausefile)) { bool bb = false; while(System.IO.File.Exists(pausefile)) { if(!bb) { bb = true; try { System.IO.File.AppendAllText(pausefile, `Ready!` + Environment.NewLine); } catch { } } System.Threading.Thread.Sleep(1000); } } bool failed = false; char bound = '\0'; foreach(string slave in slaves) { string dir = @`\\` + slave + @`\c$\temp\qizmt\" + sortmethod + @"-" + scguid + @"`; try { char thishighest = '\0'; foreach(System.IO.FileInfo fi in (new System.IO.DirectoryInfo(dir).GetFiles())) { char c = fi.Name[0]; if(c < bound) { failed = true; throw new Exception(`Data is not range sorted (" + sortmethod + @") starting on machine ` + slave + ` (Error 1FC8AB58-4DBD-4d56-9587-96312F9A5886)`); } if(c > thishighest) { thishighest = c; } fi.Delete(); } if(thishighest > bound) { bound = thishighest; } System.IO.Directory.Delete(dir); } catch(Exception e) { Qizmt_Log(`Exception: ` + e.ToString()); } } if(!failed && bound > '\0') { Qizmt_Log(`Success! (OK 55D106EA-AD09-4503-96BA-387795EDEECB)`); } } ]]> </Local> </Job> </Jobs> </SourceCode> ").Replace('`', '"')); Exec.Shell("Qizmt importdir " + exectempdir); try { System.IO.File.Delete(execfp); System.IO.Directory.Delete(exectempdir); } catch { } try { Console.WriteLine("Running " + sortmethod + " job..."); string output = Exec.Shell("Qizmt exec exec" + fguid); Console.WriteLine(output.Trim()); if (-1 == output.IndexOf("55D106EA-AD09-4503-96BA-387795EDEECB")) { throw new Exception("Sort range order verification (" + sortmethod + ") did not succeed"); } } finally { Exec.Shell("Qizmt del output" + fguid); Exec.Shell("Qizmt del exec" + fguid); } } finally { Exec.Shell("Qizmt del data" + fguid); } } Console.WriteLine("[PASSED] - " + string.Join(" ", args)); } }
static void Deploy(string[] args) { if (args.Length <= 1 || !System.IO.File.Exists(args[1])) { throw new Exception("Expected path to DFS.xml"); } string dfsxmlpath = args[1]; string masterdir; { System.IO.FileInfo fi = new System.IO.FileInfo(dfsxmlpath); masterdir = fi.DirectoryName; // Directory's full path. } Surrogate.SetNewMetaLocation(masterdir); dfs dc = dfs.ReadDfsConfig_unlocked(dfsxmlpath); string masterhost = System.Net.Dns.GetHostName(); string[] allmachines; { string[] sl = dc.Slaves.SlaveList.Split(';'); List <string> aml = new List <string>(sl.Length + 1); aml.Add(masterhost); foreach (string slave in sl) { if (0 != string.Compare(IPAddressUtil.GetName(slave), IPAddressUtil.GetName(masterhost), StringComparison.OrdinalIgnoreCase)) { aml.Add(slave); } } allmachines = aml.ToArray(); } { Console.WriteLine("Ensure cluster is perfectly healthy..."); EnsurePerfectQizmtHealtha(); // Run a job... string exec_md5; { // Generate some data to operate on. Exec.Shell("Qizmt gen data{AE7E8F7E-AE48-40e7-B5B2-7E07E39B46F9} " + 1048576.ToString()); string exectempdir = @"\\" + System.Net.Dns.GetHostName() + @"\C$\temp\qizmt\regression_test_Deploy-" + Guid.NewGuid().ToString(); if (!System.IO.Directory.Exists(exectempdir)) { System.IO.Directory.CreateDirectory(exectempdir); } string execfp = exectempdir + @"\exec{07E2B469-80F9-4776-908F-E504A906E3B6}"; System.IO.File.WriteAllText(execfp, (@"<?xml version=`1.0` encoding=`utf-8`?> <SourceCode> <Jobs> <Job Name=`exec{07E2B469-80F9-4776-908F-E504A906E3B6}`> <IOSettings> <JobType>mapreduce</JobType> <KeyLength>100</KeyLength> <DFSInput>dfs://data{*}</DFSInput> <DFSOutput>dfs://output{A785E7D1-9017-45fe-9E07-57695192A5DC}</DFSOutput> <KeyMajor>8</KeyMajor> <OutputMethod>sorted</OutputMethod> </IOSettings> <MapReduce> <Map> <![CDATA[ public virtual void Map(ByteSlice line, MapOutput output) { output.Add(line, ByteSlice.Prepare()); } ]]> </Map> <Reduce> <![CDATA[ public override void Reduce(ByteSlice key, ByteSliceList values, ReduceOutput output) { while(values.MoveNext()) { output.Add(key); } } ]]> </Reduce> </MapReduce> </Job> </Jobs> </SourceCode> ").Replace('`', '"')); Exec.Shell("Qizmt importdir " + exectempdir); try { System.IO.File.Delete(execfp); System.IO.Directory.Delete(exectempdir); } catch { } Exec.Shell("Qizmt exec exec{07E2B469-80F9-4776-908F-E504A906E3B6}"); exec_md5 = DfsSum("md5", "output{A785E7D1-9017-45fe-9E07-57695192A5DC}"); Exec.Shell("Qizmt del output{A785E7D1-9017-45fe-9E07-57695192A5DC}"); } try { const string TEMP_DLLS_PATTERN = "temp_????????-????-????-????-????????????.dll"; // Prepare to detect leaked DLLs: string lmachine = allmachines[allmachines.Length - 1]; string[] dummyleaknames = new string[] { TEMP_DLLS_PATTERN.Replace('?', 'x'), //"dummy1D48A66FD2EF41e3B6266C06D320A17D.dll", //"dummy1D48A66FD2EF41e3B6266C06D320A17D.exe" }; try { // Delete leaked DLLs on lmachine... foreach (string fn in System.IO.Directory.GetFiles(Surrogate.NetworkPathForHost(lmachine), TEMP_DLLS_PATTERN)) { System.IO.File.Delete(fn); } // Delete planted files from lmachine... //foreach (string host in allmachines) { string host = lmachine; string netdir = Surrogate.NetworkPathForHost(host); foreach (string dummyleakname in dummyleaknames) { try { System.IO.File.Delete(netdir + @"\" + dummyleakname); } catch { } } } // Plant some new leaked files on surrogate... foreach (string dummyleakname in dummyleaknames) { System.IO.File.WriteAllText(masterdir + @"\" + dummyleakname, "Dummy file for deploy leak detector" + Environment.NewLine); } } catch (Exception e) { lmachine = null; throw new Exception("Failed to prepare for deploy leak detector", e); } { Console.WriteLine("Deleting critical files across cluster to ensure deploy will succeed..."); int nfailed = 0; string failreason = ""; //foreach (string host in allmachines) if (allmachines.Length > 1) // Important; can't delete slave.exe on surrogate or it can't deploy it. { string host = allmachines[allmachines.Length - 1]; try { string netdir = Surrogate.NetworkPathForHost(host); System.IO.File.Delete(netdir + @"\MySpace.DataMining.DistributedObjects.DistributedObjectsSlave.exe"); } catch (Exception fe) { nfailed++; failreason = fe.ToString(); } } if (nfailed > 0) { Console.WriteLine("Warning: {0} files failed to be deleted; {0}", failreason); } } try { Console.WriteLine("Deploying..."); Exec.Shell("aelight deploy"); System.Threading.Thread.Sleep(1000 * 5); // Wait a bit for the services to come back up. } catch (Exception e) { Console.Error.WriteLine(e.ToString()); Console.Error.WriteLine(" WARNING: cluster may be in a bad state; may need to reinstall"); throw; } Console.WriteLine("Ensuring deploy succeeded..."); Console.WriteLine("(Note: if this hangs indefinitely, deploy failed and need to reinstall)"); if (lmachine != null) { //foreach (string host in allmachines) { string host = lmachine; string netdir = Surrogate.NetworkPathForHost(host); foreach (string dummyleakname in dummyleaknames) { { string fp = netdir + @"\" + dummyleakname; if (System.IO.File.Exists(fp)) { throw new Exception("Deployed dummy/leaked file: " + fp); } } } } { string[] leaks = System.IO.Directory.GetFiles(Surrogate.NetworkPathForHost(lmachine), TEMP_DLLS_PATTERN); if (leaks.Length > 0) { throw new Exception("Deployed leaked dll: " + leaks[0] + " (" + leaks.Length.ToString() + " in total)"); } } // Delete the planted dummy files from surrogate! foreach (string dummyleakname in dummyleaknames) { System.IO.File.Delete(masterdir + @"\" + dummyleakname); } } Console.WriteLine("Ensure cluster is perfectly healthy..."); EnsurePerfectQizmtHealtha(); // Re-run job, confirm good... { Exec.Shell("Qizmt exec exec{07E2B469-80F9-4776-908F-E504A906E3B6}"); string new_exec_md5 = DfsSum("md5", "output{A785E7D1-9017-45fe-9E07-57695192A5DC}"); Exec.Shell("Qizmt del output{A785E7D1-9017-45fe-9E07-57695192A5DC}"); if (new_exec_md5 != exec_md5) { throw new Exception("Output files from before and after deploy do not match"); } } } finally { try { Console.WriteLine("Cleaning temporary test data..."); Exec.Shell("Qizmt del exec{07E2B469-80F9-4776-908F-E504A906E3B6}"); Exec.Shell("Qizmt del data{AE7E8F7E-AE48-40e7-B5B2-7E07E39B46F9}"); Exec.Shell("Qizmt del output{A785E7D1-9017-45fe-9E07-57695192A5DC}"); } catch { } } Console.WriteLine("[PASSED] - " + string.Join(" ", args)); } }
static void ReplicationChecks(string[] args) { if (args.Length <= 1 || !System.IO.File.Exists(args[1])) { throw new Exception("Expected path to DFS.xml"); } string dfsxmlpath = args[1]; string dfsxmlpathbackup = dfsxmlpath + "$" + Guid.NewGuid().ToString(); string masterdir; { System.IO.FileInfo fi = new System.IO.FileInfo(dfsxmlpath); masterdir = fi.DirectoryName; // Directory's full path. } Surrogate.SetNewMetaLocation(masterdir); dfs olddfs = dfs.ReadDfsConfig_unlocked(dfsxmlpath); string masterhost = System.Net.Dns.GetHostName(); string[] allmachines; { string[] sl = olddfs.Slaves.SlaveList.Split(';'); List <string> aml = new List <string>(sl.Length + 1); aml.Add(masterhost); foreach (string slave in sl) { if (0 != string.Compare(IPAddressUtil.GetName(slave), IPAddressUtil.GetName(masterhost), StringComparison.OrdinalIgnoreCase)) { aml.Add(slave); } } allmachines = aml.ToArray(); } Console.WriteLine("Backing up DFS.xml to: {0} ...", dfsxmlpathbackup); try { System.IO.File.Delete(dfsxmlpathbackup); } catch { } System.IO.File.Move(dfsxmlpath, dfsxmlpathbackup); try { { Console.WriteLine("Formatting DFS for test..."); Exec.Shell("Qizmt @format Machines=" + string.Join(",", allmachines)); } { // Test logic: { long XBYTES = (long)4194304 * (long)allmachines.Length; Console.WriteLine("Generating data..."); Console.Write(" "); Exec.Shell("Qizmt gen data{476D6FE8-D645-41cc-83A1-3AB5E2DE23E7} " + (XBYTES / 4).ToString()); Console.Write("25%"); Exec.Shell("Qizmt gen data{61136275-16EC-4ff9-84CE-ACC967550181} " + (XBYTES / 4).ToString()); Console.Write("..50%"); Exec.Shell("Qizmt gen data{C76F6C06-EFC8-4808-B214-DB4D167171EB} " + (XBYTES / 2).ToString()); Console.Write("..100%"); Console.WriteLine(); } Console.WriteLine("Ensure the cluster is perfectly healthy..."); EnsurePerfectQizmtHealtha(); Console.WriteLine("Raising replication factor to 2..."); Exec.Shell("Qizmt replicationupdate 2"); { Console.WriteLine("Raising replication factor too high (ensure fail)..."); bool ok = false; System.Threading.Thread thd = new System.Threading.Thread( new System.Threading.ThreadStart( delegate { try { Exec.Shell("Qizmt replicationupdate 999999999"); } catch (Exception e) { ok = true; Console.WriteLine("Got exception as expected: {0}", e.Message); } })); thd.Start(); if (!thd.Join(1000 * 10)) { thd.Abort(); } if (!ok) { throw new Exception("Test failed: expected exception"); } } } Console.WriteLine("[PASSED] - " + string.Join(" ", args)); } finally { Console.WriteLine("Restoring DFS.xml backup..."); // Note: these are safe; the try/finally only wraps the new dfs. try { Exec.Shell("Qizmt del *"); } catch { } try { // Delete temp dfs.xml, it's being replaced with the good one. System.IO.File.Delete(dfsxmlpath); } catch { } System.IO.File.Move(dfsxmlpathbackup, dfsxmlpath); } }
static void _ReplicationRunAllCommands(string[] allmachines) { //long XBYTES = (long)67108864 * (long)allmachines.Length; long XBYTES = (long)4194304 * (long)allmachines.Length; { Console.WriteLine("RT: wordgen"); Console.Write(Exec.Shell("Qizmt wordgen wordgen{92D6883B-D950-4beb-8281-8599F352EAF7} " + XBYTES.ToString())); } if (allmachines.Length <= 2) { Console.WriteLine(" *** Skipping removemachine and addmachine: not enough machines ***"); } else { { Console.WriteLine("RT: removemachine"); Console.Write(Exec.Shell("Qizmt removemachine " + allmachines[allmachines.Length - 1])); } { Console.WriteLine("RT: addmachine"); Console.Write(Exec.Shell("Qizmt addmachine " + allmachines[allmachines.Length - 1])); } } { Console.WriteLine("RT: asciigen"); Console.Write(Exec.Shell("Qizmt asciigen asciigen{161C30D0-2810-4861-B517-AAFB207B8887} " + XBYTES.ToString())); } { Console.WriteLine("RT: bingen"); Console.Write(Exec.Shell("Qizmt bingen bingen{E59A0DA4-9D69-4e62-A03B-4085B75956D0} " + XBYTES.ToString())); } { Console.WriteLine("RT: combine"); Console.Write(Exec.Shell("Qizmt wordgen combine1part{CB302F9E-6291-4e60-8D95-EA816A116912} 1MB")); Console.Write(Exec.Shell("Qizmt wordgen combine2part{D035B44D-210C-4969-B8DE-3E8FDEE0FA32} 1MB")); Console.Write(Exec.Shell("Qizmt combine combine1part{CB302F9E-6291-4e60-8D95-EA816A116912} combine2part{D035B44D-210C-4969-B8DE-3E8FDEE0FA32} + combine{8FAD1E43-0F6F-4d81-9D96-22D095994374}")); } { Console.WriteLine("RT: info"); Console.Write(Exec.Shell("Qizmt info")); Console.Write(Exec.Shell("Qizmt info wordgen{92D6883B-D950-4beb-8281-8599F352EAF7}")); Console.Write(Exec.Shell("Qizmt info wordgen{92D6883B-D950-4beb-8281-8599F352EAF7}:" + allmachines[allmachines.Length - 1])); } // Ensure temp dir exists. string tempdir = @"\\" + System.Net.Dns.GetHostName() + @"\c$\temp\qizmt" + Guid.NewGuid().ToString(); if (!System.IO.Directory.Exists(tempdir)) { System.IO.Directory.CreateDirectory(tempdir); } { Console.WriteLine("RT: put"); string fsfn = "put{6619C2FF-1816-4adc-8EE9-12F84F290DCC}"; string fsfile = tempdir + @"\" + fsfn; System.IO.File.WriteAllText(fsfile, "Replication test for Qizmt" + Environment.NewLine + "{C3CBD581-58C7-42bb-B5AE-70ABAD806B65}" + Environment.NewLine); Console.Write(Exec.Shell("Qizmt put " + fsfile + " " + fsfn)); System.IO.File.Delete(fsfile); } { Console.WriteLine("RT: putbinary"); string fsfn = "putbinary{B581025F-9695-4433-804B-CA9E05D80E3D}"; string fsfile = tempdir + @"\" + fsfn; System.IO.File.WriteAllText(fsfile, "Replication test for Qizmt" + Environment.NewLine + "{E8406AF3-5482-4428-A973-46FF9E6D3F6A}" + Environment.NewLine); Console.Write(Exec.Shell("Qizmt putbinary " + fsfile + " " + fsfn)); System.IO.File.Delete(fsfile); } { Console.WriteLine("RT: head"); string output = Exec.Shell("Qizmt head put{6619C2FF-1816-4adc-8EE9-12F84F290DCC} 1"); Console.Write(output); string oneline = output.Trim(); string[] lines = oneline.Split('\n'); if (1 != lines.Length) { throw new Exception("Expected one line from head <file> 1"); } } { Console.WriteLine("RT: get"); string fsfn = "get{F6179A4C-8FC1-41f1-895E-05B23023F01A}"; string fsfile = tempdir + @"\" + fsfn; Console.Write(Exec.Shell("Qizmt get put{6619C2FF-1816-4adc-8EE9-12F84F290DCC} " + fsfile)); if (-1 == System.IO.File.ReadAllText(fsfile).IndexOf("{C3CBD581-58C7-42bb-B5AE-70ABAD806B65}")) { throw new Exception("Qizmt get failure"); } System.IO.File.Delete(fsfile); } { Console.WriteLine("RT: getbinary"); string fsfile = tempdir + @"\putbinary{B581025F-9695-4433-804B-CA9E05D80E3D}"; Console.Write(Exec.Shell("Qizmt getbinary putbinary{B581025F-9695-4433-804B-CA9E05D80E3D} " + tempdir)); if (-1 == System.IO.File.ReadAllText(fsfile).IndexOf("{E8406AF3-5482-4428-A973-46FF9E6D3F6A}")) { throw new Exception("Qizmt getbinary failure"); } System.IO.File.Delete(fsfile); } { Console.WriteLine("RT: rename"); Console.Write(Exec.Shell("Qizmt rename asciigen{161C30D0-2810-4861-B517-AAFB207B8887} rename{0AD29C98-D5F9-4501-B5EA-735BFDD119A7}")); // NOTE: asciigen file is gone now! } { Console.WriteLine("RT: exec"); string cachertempdir = @"\\" + System.Net.Dns.GetHostName() + @"\C$\temp\qizmt\regression_test_Cacher-" + Guid.NewGuid().ToString(); if (!System.IO.Directory.Exists(cachertempdir)) { System.IO.Directory.CreateDirectory(cachertempdir); } string cacherfp = cachertempdir + @"\exec{B9C92E20-9B5F-47a1-8081-B07560EE606E}"; System.IO.File.WriteAllText(cacherfp, (@"<?xml version=`1.0` encoding=`utf-8`?> <SourceCode> <Jobs> <Job Name=`Cacher`> <Delta> <Name>cacheB9DE42AC-2823-4ad9-9101-54DE52BDCECE</Name> <DFSInput>dfs://wordgen{92D6883B-D950-4beb-8281-8599F352EAF7}</DFSInput> </Delta> <IOSettings> <JobType>mapreduce</JobType> <KeyLength>100</KeyLength> <DFSInput></DFSInput> <DFSOutput>dfs://exec-output{5E745D71-DF26-4332-A023-69DF5D99502B}</DFSOutput> <OutputMethod>sorted</OutputMethod> </IOSettings> <MapReduce> <Map> <![CDATA[ public virtual void Map(ByteSlice line, MapOutput output) { output.Add(line, ByteSlice.Prepare()); } ]]> </Map> <Reduce> <![CDATA[ public override void Reduce(ByteSlice key, RandomAccessEntries values, RandomAccessOutput output) { for(int i = 0; i < values.Length; i++) { output.Add(key); } } ]]> </Reduce> </MapReduce> </Job> </Jobs> </SourceCode> ").Replace('`', '"')); Exec.Shell("Qizmt importdir " + cachertempdir); try { System.IO.File.Delete(cacherfp); System.IO.Directory.Delete(cachertempdir); } catch { } Console.Write(Exec.Shell("Qizmt exec exec{B9C92E20-9B5F-47a1-8081-B07560EE606E}")); } { Console.WriteLine("RT: del"); Console.Write(Exec.Shell("Qizmt del rename{0AD29C98-D5F9-4501-B5EA-735BFDD119A7}")); // Data Console.Write(Exec.Shell("Qizmt del cacheB9DE42AC-2823-4ad9-9101-54DE52BDCECE")); // DeltaCache Console.Write(Exec.Shell("Qizmt del exec{B9C92E20-9B5F-47a1-8081-B07560EE606E}")); // Job } { Console.WriteLine("RT: sorted"); if (-1 == Exec.Shell("Qizmt sorted wordgen{92D6883B-D950-4beb-8281-8599F352EAF7}").IndexOf("Not sorted")) { throw new Exception("Expected unsorted content: wordgen{92D6883B-D950-4beb-8281-8599F352EAF7}"); } if (-1 == Exec.Shell("Qizmt sorted exec-output{5E745D71-DF26-4332-A023-69DF5D99502B}").IndexOf("Sorted")) { throw new Exception("Expected sorted content: exec-output{5E745D71-DF26-4332-A023-69DF5D99502B}"); } } { Console.WriteLine("RT: md5"); if (32 != DfsSum("md5", "exec-output{5E745D71-DF26-4332-A023-69DF5D99502B}").Length) { throw new Exception("Expected 32 bytes of MD5 hex string"); } } { Console.WriteLine("RT: checksum"); if (DfsSum("wordgen{92D6883B-D950-4beb-8281-8599F352EAF7}") != DfsSum("exec-output{5E745D71-DF26-4332-A023-69DF5D99502B}")) { throw new Exception("Checksum of input file (unsorted) does not match checksum of output file (sorted) for exec job"); } } { Console.WriteLine("RT: ls"); Console.Write(Exec.Shell("Qizmt ls")); } { Console.WriteLine("RT: health"); EnsurePerfectQizmtHealtha(); } try { System.IO.Directory.Delete(tempdir); } catch { } }