public static void Main(string[] args) { if (args.Length != 3) { Console.Error.WriteLine("Usage: SHS.Builder <leader> <linkfile.bin.gz> <friendly-name>"); Environment.Exit(1); } var sw = Stopwatch.StartNew(); var service = new Service(args[0]); var numServers = service.NumServers(); Console.WriteLine("SHS service is currently running on {0} servers", numServers); var store = service.CreateStore(numServers - 1, 2, args[2]); var plEnum = new PageLinksFileEnumerator(args[1]); store.AddPageLinks(plEnum); store.Seal(); Console.Error.WriteLine("Done. Building store {0:N} took {1} seconds.", store.ID, 0.001 * sw.ElapsedMilliseconds); }
static void Main(string[] args) { if (args.Length != 4) { Console.Error.WriteLine("Usage: SHS.RegressionTest3 <servers.txt> <numPages> <numIterations> <numValThreads>"); } else { int numPages = int.Parse(args[1]); int numIters = int.Parse(args[2]); int numValidators = int.Parse(args[3]); var sw = Stopwatch.StartNew(); var svc = new Service(args[0]); Console.WriteLine("Service currently provides {0} servers", svc.NumServers()); var store = svc.CreateStore(); Console.WriteLine("Created new store with GUID {0:N}", store.ID); // Create a validation graph with "numPages" vertices and (for now) no edges var rand = new Random(123456); var pages = new string[numPages]; var fwds = new List<int>[numPages]; var bwds = new List<int>[numPages]; for (int i = 0; i < pages.Length; i++) { var r = rand.Next(); pages[i] = string.Format("http://www.{0:D2}.com/{1}", rand.Next(1, 100), RandomString(rand)); fwds[i] = new List<int>(); bwds[i] = new List<int>(); } Array.Sort(pages); Console.WriteLine("Synthesized {0} URLs ({1} duplicates)", pages.Length, pages.Length - pages.Distinct().Count()); for (int iter = 0; iter < numIters; iter++) { var batchSize = rand.Next(10, 50); var plBatch = new List<PL>(); long addCtr = 0; long delCtr = 0; while (plBatch.Count < batchSize) { // Pick a page var src = rand.Next(0, pages.Length); if (plBatch.Exists(x => x.src == src)) continue; var list = fwds[src]; foreach (var dst in list) { bwds[dst].Remove(src); } if (list.Count == 0) { // If this page has no links, create between 20 and 50 links, with bias towards the "neighborhood" var numAdds = rand.Next(20, 51); while (numAdds > 0) { var dst = (int)RandomNormal(rand, src, 100); if (dst >= 0 && dst < pages.Length && !list.Contains(dst) && dst != src) { list.Add(dst); addCtr++; numAdds--; } } } else { // Otherwise, choose about half of the links to delete, and add about the same number of new links var dels = list.Where(x => rand.Next(0, 2) == 0).ToList(); delCtr += dels.Count; var numAdds = rand.Next(dels.Count - 3, dels.Count + 4); while (numAdds > 0) { var dst = (int)RandomNormal(rand, src, 100); if (dst >= 0 && dst < pages.Length && !list.Contains(dst) && dst != src) { list.Add(dst); addCtr++; numAdds--; } } list = list.Except(dels).ToList(); } foreach (var dst in list) { bwds[dst].Add(src); } fwds[src] = list; plBatch.Add(new PL { src = src, dsts = CloneList(list) }); } var pageLinksBatch = plBatch.Select(x => new PageLinks { pageUrl = pages[x.src], linkUrls = x.dsts.Select(y => pages[y]).ToArray() }).ToList(); int epoch = store.AddPageLinks(pageLinksBatch.GetEnumerator()); //store.MarkAtomic(); var snapFwds = CloneLists(fwds); var snapBwds = CloneLists(bwds); for (int i = 0; i < numValidators; i++) { var vs = new ValidatorState(args[0], store.ID, epoch, plBatch, pages, snapFwds, snapBwds); new Thread(vs.DoWork).Start(); } var srcUrls = plBatch.Select(x => pages[x.src]).ToArray(); var srcUids = store.BatchedUrlToUid(srcUrls, ref epoch); var fwdLinkUids = store.BatchedGetLinks(srcUids, Dir.Fwd, ref epoch); for (int i = 0; i < fwdLinkUids.Length; i++) { var fwdLinkUrlsR = store.BatchedUidToUrl(fwdLinkUids[i], ref epoch); var fwdLinkUrlsL = fwds[plBatch[i].src].Select(x => pages[x]).ToArray(); if (!SameSets(fwdLinkUrlsR, fwdLinkUrlsL)) { lock (Console.Out) { Console.WriteLine("Detected inconsistenty! srcURL[{0}]={1}", i, srcUrls[i]); Console.WriteLine("{0} fwd link URLs according to SHS", fwdLinkUrlsR.Length); for (int k = 0; k < fwdLinkUrlsR.Length; k++) { Console.WriteLine(" fwdLinkUrlsR[{0}]={1}", k, fwdLinkUrlsR[k]); } Console.WriteLine("{0} fwd link URLs according to local state", fwdLinkUrlsL.Length); for (int k = 0; k < fwdLinkUrlsL.Length; k++) { Console.WriteLine(" fwdLinkUrlsL[{0}]={1}", k, fwdLinkUrlsL[k]); } } throw new Exception(); } var bwdLinkUids = store.BatchedGetLinks(fwdLinkUids[i], Dir.Bwd, ref epoch); for (int j = 0; j < bwdLinkUids.Length; j++) { var bwdLinkUrlsR = store.BatchedUidToUrl(bwdLinkUids[j], ref epoch); var bwdLinkUrlsL = bwds[Idx(fwdLinkUrlsR[j], pages, plBatch[i].dsts)].Select(x => pages[x]).ToArray(); if (!SameSets(bwdLinkUrlsR, bwdLinkUrlsL)) { lock (Console.Out) { Console.WriteLine("Detected inconsistenty!"); Console.WriteLine(" srcURL[{0}]={1}", i, srcUrls[i]); Console.WriteLine(" dstURL[{0}]={1}", j, fwdLinkUrlsR[j]); Console.WriteLine("{0} bwd link URLs according to SHS", bwdLinkUrlsR.Length); for (int k = 0; k < bwdLinkUrlsR.Length; k++) { Console.WriteLine(" bwdLinkUrlsR[{0}]={1}", k, bwdLinkUrlsR[k]); } Console.WriteLine("{0} bwd link URLs according to local state", bwdLinkUrlsL.Length); for (int k = 0; k < bwdLinkUrlsL.Length; k++) { Console.WriteLine(" bwdLinkUrlsL[{0}]={1}", k, bwdLinkUrlsL[k]); } } throw new Exception(); } } } Console.WriteLine("Iteration {0}: Put {1} PageLinks into store, Adding {2} and deleting {3} links. Validation passed!", iter, batchSize, addCtr, delCtr); } Console.WriteLine("{0} of {1} non-mutating validation threads were exempted, validated {2} of graph on average", counters.numEpochPassed, counters.numChecks, counters.sumFractionChecked / counters.numChecks); Console.WriteLine("Done. RegressionTest3 took {0} seconds", 0.001 * sw.ElapsedMilliseconds); } }
public static void Main(string[] args) { if (args.Length != 5) { Console.Error.WriteLine("Usage: SHS.Builder <leader> <file-type: -b|-t|-r> <linkfile.bin.gz> <friendly-name>"); Environment.Exit(1); } Console.Write("Waiting..."); Console.ReadLine(); var sw = Stopwatch.StartNew(); var service = new Service(args[0]); var numServers = service.NumServers(); Console.WriteLine("SHS service is currently running on {0} servers", numServers); //var store = service.CreateStore(numServers-1, 2, args[2]); var store = service.CreateStore(numServers - 1, 2, args[3], 5); Console.WriteLine("Created store..."); if (args[1] == "-b") // Adjacent list in binary { var plBinEnum = new PageLinksBinaryFileEnumerator(args[2]); store.AddPageLinks(plBinEnum); store.Seal(); } else if (args[1] == "-t") // Adjacent list in text { var plTextEnum = new PageLinksTextFileEnumerator(args[2]); store.AddPageLinks(plTextEnum); store.Seal(); } else if (args[1] == "-r") // Adjacent list with timestamp { var plTextEnum = new RevisionPageLinksTextFileEnumerator(args[2]); store.AddPageLinks(plTextEnum); store.Seal(); } else if (args[1] == "-p") // Pair links with outdegree URL format { Console.WriteLine("Starting..."); var plTextEnum = new PageIndividualLinksTextFileEnumerator(args[2]); store.AddIndividualLinks(plTextEnum); store.Seal(); } else if (args[1] == "-s") // Sample pair links in SURT form with outdegree { Console.WriteLine("Starting..."); var plTextEnum = new PageIndividualLinksSURTTextFileEnumerator(args[2]); store.AddIndividualLinks(plTextEnum); store.Seal(); } else if (args[1] == "-pt") // Pair links with time and outdegree in URL format { Console.WriteLine("Starting..."); var plTextEnum = new PageIndividualTempLinksTextFileEnumerator(args[2]); var plURLEnum = new URLEnumerator(args[3]); store.AddIndividualTempLinks(plTextEnum, plURLEnum); store.Seal(); } Console.Error.WriteLine("Done. Building store {0:N} took {1} seconds.", store.ID, 0.001 * sw.ElapsedMilliseconds); }
static void Main(string[] args) { if (args.Length != 3) { Console.Error.WriteLine("Usage: SHS.RegressionTest2 <leader> <numPages> <numIterations>"); } else { int numPages = int.Parse(args[1]); int numIters = int.Parse(args[2]); var sw = System.Diagnostics.Stopwatch.StartNew(); var svc = new Service(args[0]); Console.WriteLine("Service currently provides {0} servers", svc.NumServers()); var store = svc.CreateStore(); Console.WriteLine("Created new store with GUID {0:N}", store.ID); // Create a validation graph with "numPages" vertices and (for now) no edges var rand = new Random(123456); var pages = new string[numPages]; var fwds = new List<int>[numPages]; var bwds = new List<int>[numPages]; for (int i = 0; i < pages.Length; i++) { var r = rand.Next(); pages[i] = string.Format("http://www.{0:D2}.com/{1}", rand.Next(1, 100), RandomString(rand)); fwds[i] = new List<int>(); bwds[i] = new List<int>(); } Array.Sort(pages); Console.Error.WriteLine("Synthesized {0} URLs ({1} duplicates)", pages.Length, pages.Length - pages.Distinct().Count()); for (int iter = 0; iter < numIters; iter++) { var batchSize = rand.Next(10, 50); var plBatch = new List<PL>(); long addCtr = 0; long delCtr = 0; while (plBatch.Count < batchSize) { // Pick a page var src = rand.Next(0, pages.Length); if (plBatch.Exists(x => x.src == src)) continue; var list = fwds[src]; foreach (var dst in list) { bwds[dst].Remove(src); } if (list.Count == 0) { // If this page has no links, create between 20 and 50 links, with bias towards the "neighborhood" var numAdds = rand.Next(20, 51); while (numAdds > 0) { var dst = (int)RandomNormal(rand, src, 100); if (dst >= 0 && dst < pages.Length && !list.Contains(dst) && dst != src) { list.Add(dst); addCtr++; numAdds--; } } } else { // Otherwise, choose about half of the links to delete, and add about the same number of new links var dels = list.Where(x => rand.Next(0, 2) == 0).ToList(); delCtr += dels.Count; var numAdds = rand.Next(dels.Count - 3, dels.Count + 4); while (numAdds > 0) { var dst = (int)RandomNormal(rand, src, 100); if (dst >= 0 && dst < pages.Length && !list.Contains(dst) && dst != src) { list.Add(dst); addCtr++; numAdds--; } } list = list.Except(dels).ToList(); } foreach (var dst in list) { bwds[dst].Add(src); } fwds[src] = list; plBatch.Add(new PL { src = src, dsts = list }); } var pageLinksBatch = plBatch.Select(x => new PageLinks{pageUrl = pages[x.src], linkUrls = x.dsts.Select(y => pages[y]).ToArray()}).ToList(); store.AddPageLinks(pageLinksBatch.GetEnumerator()); store.MarkAtomic(); var srcUrls = plBatch.Select(x => pages[x.src]).ToArray(); var srcUids = store.BatchedUrlToUid(srcUrls); var fwdLinkUids = store.BatchedGetLinks(srcUids, Dir.Fwd); for (int i = 0; i < fwdLinkUids.Length; i++) { var fwdLinkUrlsR = store.BatchedUidToUrl(fwdLinkUids[i]); var fwdLinkUrlsL = fwds[plBatch[i].src].Select(x => pages[x]).ToArray(); AssertSameSets(fwdLinkUrlsR, fwdLinkUrlsL); var bwdLinkUids = store.BatchedGetLinks(fwdLinkUids[i], Dir.Bwd); for (int j = 0; j < bwdLinkUids.Length; j++) { var bwdLinkUrlsR = store.BatchedUidToUrl(bwdLinkUids[j]); var bwdLinkUrlsL = bwds[Idx(fwdLinkUrlsR[j], pages, plBatch[i].dsts)].Select(x => pages[x]).ToArray(); AssertSameSets(bwdLinkUrlsR, bwdLinkUrlsL); } } Console.Error.WriteLine("Iteration {0}: Put {1} PageLinks into store, Adding {2} and deleting {3} links. Validation passed!", iter, batchSize, addCtr, delCtr); } Console.WriteLine("Done. RegressionTest2 took {0} seconds", 0.001 * sw.ElapsedMilliseconds); } }