Exemple #1
0
        public static void Main(string[] args)
        {
            if (args.Length != 3)
            {
                Console.Error.WriteLine("Usage: SHS.Builder <leader> <linkfile.bin.gz> <friendly-name>");
                Environment.Exit(1);
            }
            var sw         = Stopwatch.StartNew();
            var service    = new Service(args[0]);
            var numServers = service.NumServers();

            Console.WriteLine("SHS service is currently running on {0} servers", numServers);
            var store  = service.CreateStore(numServers - 1, 2, args[2]);
            var plEnum = new PageLinksFileEnumerator(args[1]);

            store.AddPageLinks(plEnum);
            store.Seal();
            Console.Error.WriteLine("Done. Building store {0:N} took {1} seconds.", store.ID, 0.001 * sw.ElapsedMilliseconds);
        }
Exemple #2
0
        static void Main(string[] args)
        {
            if (args.Length != 4) {
            Console.Error.WriteLine("Usage: SHS.RegressionTest3 <servers.txt> <numPages> <numIterations> <numValThreads>");
              } else {
            int numPages = int.Parse(args[1]);
            int numIters = int.Parse(args[2]);
            int numValidators = int.Parse(args[3]);
            var sw = Stopwatch.StartNew();
            var svc = new Service(args[0]);
            Console.WriteLine("Service currently provides {0} servers", svc.NumServers());
            var store = svc.CreateStore();
            Console.WriteLine("Created new store with GUID {0:N}", store.ID);

            // Create a validation graph with "numPages" vertices and (for now) no edges
            var rand = new Random(123456);
            var pages = new string[numPages];
            var fwds = new List<int>[numPages];
            var bwds = new List<int>[numPages];
            for (int i = 0; i < pages.Length; i++) {
              var r = rand.Next();
              pages[i] = string.Format("http://www.{0:D2}.com/{1}", rand.Next(1, 100), RandomString(rand));
              fwds[i] = new List<int>();
              bwds[i] = new List<int>();
            }
            Array.Sort(pages);
            Console.WriteLine("Synthesized {0} URLs ({1} duplicates)", pages.Length, pages.Length - pages.Distinct().Count());

            for (int iter = 0; iter < numIters; iter++) {
              var batchSize = rand.Next(10, 50);
              var plBatch = new List<PL>();
              long addCtr = 0;
              long delCtr = 0;

              while (plBatch.Count < batchSize) {
            // Pick a page
            var src = rand.Next(0, pages.Length);
            if (plBatch.Exists(x => x.src == src)) continue;
            var list = fwds[src];
            foreach (var dst in list) {
              bwds[dst].Remove(src);
            }
            if (list.Count == 0) {
              // If this page has no links, create between 20 and 50 links, with bias towards the "neighborhood"
              var numAdds = rand.Next(20, 51);
              while (numAdds > 0) {
                var dst = (int)RandomNormal(rand, src, 100);
                if (dst >= 0 && dst < pages.Length && !list.Contains(dst) && dst != src) {
                  list.Add(dst);
                  addCtr++;
                  numAdds--;
                }
              }
            } else {
              // Otherwise, choose about half of the links to delete, and add about the same number of new links
              var dels = list.Where(x => rand.Next(0, 2) == 0).ToList();
              delCtr += dels.Count;
              var numAdds = rand.Next(dels.Count - 3, dels.Count + 4);
              while (numAdds > 0) {
                var dst = (int)RandomNormal(rand, src, 100);
                if (dst >= 0 && dst < pages.Length && !list.Contains(dst) && dst != src) {
                  list.Add(dst);
                  addCtr++;
                  numAdds--;
                }
              }
              list = list.Except(dels).ToList();
            }
            foreach (var dst in list) {
              bwds[dst].Add(src);
            }
            fwds[src] = list;
            plBatch.Add(new PL { src = src, dsts = CloneList(list) });
              }
              var pageLinksBatch = plBatch.Select(x => new PageLinks { pageUrl = pages[x.src], linkUrls = x.dsts.Select(y => pages[y]).ToArray() }).ToList();

              int epoch = store.AddPageLinks(pageLinksBatch.GetEnumerator());
              //store.MarkAtomic();

              var snapFwds = CloneLists(fwds);
              var snapBwds = CloneLists(bwds);
              for (int i = 0; i < numValidators; i++) {
            var vs = new ValidatorState(args[0], store.ID, epoch, plBatch, pages, snapFwds, snapBwds);
            new Thread(vs.DoWork).Start();
              }

              var srcUrls = plBatch.Select(x => pages[x.src]).ToArray();
              var srcUids = store.BatchedUrlToUid(srcUrls, ref epoch);
              var fwdLinkUids = store.BatchedGetLinks(srcUids, Dir.Fwd, ref epoch);
              for (int i = 0; i < fwdLinkUids.Length; i++) {
            var fwdLinkUrlsR = store.BatchedUidToUrl(fwdLinkUids[i], ref epoch);
            var fwdLinkUrlsL = fwds[plBatch[i].src].Select(x => pages[x]).ToArray();
            if (!SameSets(fwdLinkUrlsR, fwdLinkUrlsL)) {
              lock (Console.Out) {
                Console.WriteLine("Detected inconsistenty! srcURL[{0}]={1}", i, srcUrls[i]);
                Console.WriteLine("{0} fwd link URLs according to SHS", fwdLinkUrlsR.Length);
                for (int k = 0; k < fwdLinkUrlsR.Length; k++) {
                  Console.WriteLine("  fwdLinkUrlsR[{0}]={1}", k, fwdLinkUrlsR[k]);
                }
                Console.WriteLine("{0} fwd link URLs according to local state", fwdLinkUrlsL.Length);
                for (int k = 0; k < fwdLinkUrlsL.Length; k++) {
                  Console.WriteLine("  fwdLinkUrlsL[{0}]={1}", k, fwdLinkUrlsL[k]);
                }
              }
              throw new Exception();
            }
            var bwdLinkUids = store.BatchedGetLinks(fwdLinkUids[i], Dir.Bwd, ref epoch);
            for (int j = 0; j < bwdLinkUids.Length; j++) {
              var bwdLinkUrlsR = store.BatchedUidToUrl(bwdLinkUids[j], ref epoch);
              var bwdLinkUrlsL = bwds[Idx(fwdLinkUrlsR[j], pages, plBatch[i].dsts)].Select(x => pages[x]).ToArray();
              if (!SameSets(bwdLinkUrlsR, bwdLinkUrlsL)) {
                lock (Console.Out) {
                  Console.WriteLine("Detected inconsistenty!");
                  Console.WriteLine("  srcURL[{0}]={1}", i, srcUrls[i]);
                  Console.WriteLine("  dstURL[{0}]={1}", j, fwdLinkUrlsR[j]);
                  Console.WriteLine("{0} bwd link URLs according to SHS", bwdLinkUrlsR.Length);
                  for (int k = 0; k < bwdLinkUrlsR.Length; k++) {
                    Console.WriteLine("  bwdLinkUrlsR[{0}]={1}", k, bwdLinkUrlsR[k]);
                  }
                  Console.WriteLine("{0} bwd link URLs according to local state", bwdLinkUrlsL.Length);
                  for (int k = 0; k < bwdLinkUrlsL.Length; k++) {
                    Console.WriteLine("  bwdLinkUrlsL[{0}]={1}", k, bwdLinkUrlsL[k]);
                  }
                }
                throw new Exception();
              }
            }
              }
              Console.WriteLine("Iteration {0}: Put {1} PageLinks into store, Adding {2} and deleting {3} links. Validation passed!", iter, batchSize, addCtr, delCtr);
            }

            Console.WriteLine("{0} of {1} non-mutating validation threads were exempted, validated {2} of graph on average",
              counters.numEpochPassed, counters.numChecks, counters.sumFractionChecked / counters.numChecks);
            Console.WriteLine("Done. RegressionTest3 took {0} seconds", 0.001 * sw.ElapsedMilliseconds);
              }
        }
        public static void Main(string[] args)
        {
            if (args.Length != 5)
            {
                Console.Error.WriteLine("Usage: SHS.Builder <leader> <file-type: -b|-t|-r> <linkfile.bin.gz> <friendly-name>");
                Environment.Exit(1);
            }

            Console.Write("Waiting...");
            Console.ReadLine();
            var sw         = Stopwatch.StartNew();
            var service    = new Service(args[0]);
            var numServers = service.NumServers();

            Console.WriteLine("SHS service is currently running on {0} servers", numServers);
            //var store = service.CreateStore(numServers-1, 2, args[2]);
            var store = service.CreateStore(numServers - 1, 2, args[3], 5);

            Console.WriteLine("Created store...");
            if (args[1] == "-b")  // Adjacent list in binary
            {
                var plBinEnum = new PageLinksBinaryFileEnumerator(args[2]);
                store.AddPageLinks(plBinEnum);
                store.Seal();
            }
            else if (args[1] == "-t")  // Adjacent list in text
            {
                var plTextEnum = new PageLinksTextFileEnumerator(args[2]);
                store.AddPageLinks(plTextEnum);
                store.Seal();
            }
            else if (args[1] == "-r")  // Adjacent list with timestamp
            {
                var plTextEnum = new RevisionPageLinksTextFileEnumerator(args[2]);
                store.AddPageLinks(plTextEnum);
                store.Seal();
            }
            else if (args[1] == "-p")  // Pair links with outdegree URL format
            {
                Console.WriteLine("Starting...");
                var plTextEnum = new PageIndividualLinksTextFileEnumerator(args[2]);
                store.AddIndividualLinks(plTextEnum);
                store.Seal();
            }
            else if (args[1] == "-s")  // Sample pair links in SURT form with outdegree
            {
                Console.WriteLine("Starting...");
                var plTextEnum = new PageIndividualLinksSURTTextFileEnumerator(args[2]);
                store.AddIndividualLinks(plTextEnum);
                store.Seal();
            }
            else if (args[1] == "-pt")  // Pair links with time and outdegree in URL format
            {
                Console.WriteLine("Starting...");
                var plTextEnum = new PageIndividualTempLinksTextFileEnumerator(args[2]);
                var plURLEnum  = new URLEnumerator(args[3]);
                store.AddIndividualTempLinks(plTextEnum, plURLEnum);
                store.Seal();
            }



            Console.Error.WriteLine("Done. Building store {0:N} took {1} seconds.", store.ID, 0.001 * sw.ElapsedMilliseconds);
        }
Exemple #4
0
        static void Main(string[] args)
        {
            if (args.Length != 3) {
            Console.Error.WriteLine("Usage: SHS.RegressionTest2 <leader> <numPages> <numIterations>");
              } else {
            int numPages = int.Parse(args[1]);
            int numIters = int.Parse(args[2]);
            var sw = System.Diagnostics.Stopwatch.StartNew();
            var svc = new Service(args[0]);
            Console.WriteLine("Service currently provides {0} servers", svc.NumServers());
            var store = svc.CreateStore();
            Console.WriteLine("Created new store with GUID {0:N}", store.ID);

            // Create a validation graph with "numPages" vertices and (for now) no edges
            var rand = new Random(123456);
            var pages = new string[numPages];
            var fwds = new List<int>[numPages];
            var bwds = new List<int>[numPages];
            for (int i = 0; i < pages.Length; i++) {
              var r = rand.Next();
              pages[i] = string.Format("http://www.{0:D2}.com/{1}", rand.Next(1, 100), RandomString(rand));
              fwds[i] = new List<int>();
              bwds[i] = new List<int>();
            }
            Array.Sort(pages);
            Console.Error.WriteLine("Synthesized {0} URLs ({1} duplicates)", pages.Length, pages.Length - pages.Distinct().Count());

            for (int iter = 0; iter < numIters; iter++) {
              var batchSize = rand.Next(10, 50);
              var plBatch = new List<PL>();
              long addCtr = 0;
              long delCtr = 0;

              while (plBatch.Count < batchSize) {
            // Pick a page
            var src = rand.Next(0, pages.Length);
            if (plBatch.Exists(x => x.src == src)) continue;
            var list = fwds[src];
            foreach (var dst in list) {
              bwds[dst].Remove(src);
            }
            if (list.Count == 0) {
              // If this page has no links, create between 20 and 50 links, with bias towards the "neighborhood"
              var numAdds = rand.Next(20, 51);
              while (numAdds > 0) {
                var dst = (int)RandomNormal(rand, src, 100);
                if (dst >= 0 && dst < pages.Length && !list.Contains(dst) && dst != src) {
                  list.Add(dst);
                  addCtr++;
                  numAdds--;
                }
              }
            } else {
              // Otherwise, choose about half of the links to delete, and add about the same number of new links
              var dels = list.Where(x => rand.Next(0, 2) == 0).ToList();
              delCtr += dels.Count;
              var numAdds = rand.Next(dels.Count - 3, dels.Count + 4);
              while (numAdds > 0) {
                var dst = (int)RandomNormal(rand, src, 100);
                if (dst >= 0 && dst < pages.Length && !list.Contains(dst) && dst != src) {
                  list.Add(dst);
                  addCtr++;
                  numAdds--;
                }
              }
              list = list.Except(dels).ToList();
            }
            foreach (var dst in list) {
              bwds[dst].Add(src);
            }
            fwds[src] = list;
            plBatch.Add(new PL { src = src, dsts = list });
              }
              var pageLinksBatch = plBatch.Select(x => new PageLinks{pageUrl = pages[x.src], linkUrls = x.dsts.Select(y => pages[y]).ToArray()}).ToList();

              store.AddPageLinks(pageLinksBatch.GetEnumerator());
              store.MarkAtomic();

              var srcUrls = plBatch.Select(x => pages[x.src]).ToArray();
              var srcUids = store.BatchedUrlToUid(srcUrls);
              var fwdLinkUids = store.BatchedGetLinks(srcUids, Dir.Fwd);
              for (int i = 0; i < fwdLinkUids.Length; i++) {
            var fwdLinkUrlsR = store.BatchedUidToUrl(fwdLinkUids[i]);
            var fwdLinkUrlsL = fwds[plBatch[i].src].Select(x => pages[x]).ToArray();
            AssertSameSets(fwdLinkUrlsR, fwdLinkUrlsL);
            var bwdLinkUids = store.BatchedGetLinks(fwdLinkUids[i], Dir.Bwd);
            for (int j = 0; j < bwdLinkUids.Length; j++) {
              var bwdLinkUrlsR = store.BatchedUidToUrl(bwdLinkUids[j]);
              var bwdLinkUrlsL = bwds[Idx(fwdLinkUrlsR[j], pages, plBatch[i].dsts)].Select(x => pages[x]).ToArray();
              AssertSameSets(bwdLinkUrlsR, bwdLinkUrlsL);
            }
              }
              Console.Error.WriteLine("Iteration {0}: Put {1} PageLinks into store, Adding {2} and deleting {3} links. Validation passed!", iter, batchSize, addCtr, delCtr);
            }

            Console.WriteLine("Done. RegressionTest2 took {0} seconds", 0.001 * sw.ElapsedMilliseconds);
              }
        }