private static long[] GetRoots(UidState <long> roots, long[] uids) { var reprs = roots.GetMany(uids); var map = new UidMap(); bool recur = false; for (int i = 0; i < uids.Length; i++) { if (reprs[i] != uids[i]) { map.Add(reprs[i]); recur = true; } } if (recur) { var change = false; var reprClos = GetRoots(roots, map); for (int i = 0; i < uids.Length; i++) { if (reprs[i] != uids[i]) { var rep = reprClos[map[reprs[i]]]; if (reprs[i] != rep) { reprs[i] = rep; change = true; } } } if (change) { roots.SetMany(uids, reprs); } } return(reprs); }
public static void Main(string[] args) { var shs = new Service(args[0]).OpenStore(Guid.Parse(args[1])); //using (var rd = new BinaryReader(new BufferedStream(new FileStream(args[2], FileMode.Open, FileAccess.Read)))) { RevisionData info = new RevisionData(args[1]); using (var rd = new StreamReader(new BufferedStream(new FileStream(args[2], FileMode.Open, FileAccess.Read)))) { DateTime d1 = Convert.ToDateTime(args[3]); DateTime d2 = Convert.ToDateTime(args[4]); try { int queryId = Int32.Parse(rd.ReadLine()); int numUrls = Int32.Parse(rd.ReadLine()); var urls = new string[numUrls]; for (int i = 0; i < numUrls; i++) { urls[i] = rd.ReadLine(); } var sw = Stopwatch.StartNew(); var uids = shs.BatchedUrlToUid(urls); var tbl = new UidMap(uids, true); long[] bwdUids = tbl; var bwdLinks = shs.BatchedSampleLinks(bwdUids, Dir.Bwd, bs, true); SortedDictionary <string, long> temp = new SortedDictionary <string, long>(); for (int i = 0; i < bwdUids.Length; i++) { var bwdValidateUids = shs.BatchedSampleLinks(bwdLinks[i], Dir.Fwd, fs, true); for (int j = 0; j < bwdValidateUids.Length; j++) { string[] validateUrls = shs.BatchedUidToUrl(bwdValidateUids[j]); temp = info.getOutlinkInDuration(bwdLinks[i][j], bwdValidateUids[j], validateUrls, d1, d2); if (temp.ContainsValue(bwdUids[i])) { } } var bwdValidateUrls = shs.BatchedUidToUrl(bwdLinks[i]); //info.getInlinkInDuration(bwdUids[i], bwdLinks[i], ) } var fwdUids = shs.BatchedSampleLinks(tbl, Dir.Fwd, fs, true); var fwdUrls = shs.BatchedUidToUrl(tbl); foreach (long[] x in bwdLinks) { tbl.Add(x); } foreach (long[] x in fwdUids) { tbl.Add(x); } long[] srcUids = tbl; string[] return_urls = shs.BatchedUidToUrl(srcUids); //Console.Error.WriteLine("Length in Archive {0}", tbl.GetSize()); //var extTbl = tbl.Subtract(new UidMap(uids, true)); //Console.Error.WriteLine("Length in Archive {0}", extTbl.GetSize()); //long one_hope_retrieval_time = sw.ElapsedTicks; //Console.WriteLine("Retrieve 1-hops nodes: {0} from {1} root_nodes in {2} microseconds", srcUids.Length, uids.Length, one_hope_retrieval_time / 10); //sw = Stopwatch.StartNew(); var dstUids = shs.BatchedGetLinks(srcUids, Dir.Fwd); //long forward_link_of_one_hop = sw.ElapsedTicks; SortedDictionary <long, KeyValuePair <double, double> > return_score = computeHITS(tbl, srcUids, dstUids); //long[] extUids = extTbl; //var extUrls = shs.BatchedUidToUrl(extUids); long end_time = sw.ElapsedTicks; Console.WriteLine("HITS finish in {0} microseconds with {1} links", end_time / 10, tbl.GetSize()); /* * int menu = 0; * * while ((menu = Int32.Parse(Console.ReadLine())) > 0) * { * try { * Console.WriteLine("You choose {0}.", menu); * switch (menu) * { * case 1: * Console.Error.WriteLine("Num of URLs: {0}", tbl.GetSize()); * tbl.PrintList(); * break; * case 2: * Console.Error.WriteLine("Num of extend URLs: {0}", extTbl.GetSize()); * extTbl.PrintList(); * break; * case 3: * for (int i = 0; i < uids.Length; i++) * { * if (uids[i] > -1) * { * int idx = tbl[uids[i]]; * Console.WriteLine("{0}\t{1}\t{2}", aut[idx], hub[idx], urls[i]); * } * } * break; * case 4: * Console.Error.WriteLine("Num of extend URLs: {0}", extUids.Length); * for (int i = 0; i < extUrls.Length; i++) * { * if (extUids[i] > -1) * { * int idx = tbl[extUids[i]]; * Console.WriteLine("{0}\t{1}\t{2}", aut[idx], hub[idx], extUrls[i]); * } * } * break; * case 5: * Console.Error.WriteLine("Num of UIDS: {0}", uids.Length); * for (int i = 0; i < uids.Length; i++) * { * Console.WriteLine("{0}", uids[i]); * } * break; * case 6: * Console.Error.WriteLine("Mapping UID to URL"); * long uid = Int64.Parse(Console.ReadLine()); * Console.WriteLine("{0}", shs.UidToUrl(uid)); * break; * case 7: * Console.Error.WriteLine("Mapping URL to UID"); * string url = Console.ReadLine(); * Console.WriteLine("{0}", shs.UrlToUid(url)); * break; * default: * Console.WriteLine("What?"); * break; * } * } * catch (Exception ex) * { * Console.Error.WriteLine(ex.ToString()); * } * } * */ //Output the result scores to screen var result_urls = shs.BatchedUidToUrl(srcUids); for (int i = 0; i < srcUids.Length; i++) { if (return_score.ContainsKey(srcUids[i])) { KeyValuePair <double, double> score = new KeyValuePair <double, double>(); return_score.TryGetValue(srcUids[i], out score); Console.WriteLine("{0}\t{1}\t{2}", score.Key, score.Value, result_urls[i]); } } //long end_time = sw.ElapsedTicks; //Console.WriteLine("SALSA finish in {0} microseconds", end_time / 10); //for (int i = 0; i < scores.Length; i++) //{ // Console.WriteLine("{0}: {1}", urls[i], scores[i]); //} } catch (EndOfStreamException) { } } }
public static void Main(string[] args) { if (args.Length != 2) { Console.Error.WriteLine("Usage: SHS.WCC <leader> <store>"); } else { var sw = Stopwatch.StartNew(); var store = new Service(args[0]).OpenStore(Guid.Parse(args[1])); var roots = store.AllocateUidState <long>(); roots.SetAll(x => x); var batch = new Batch <long>(10000); foreach (long u in store.Uids()) { batch.Add(u); if (batch.Full || store.IsLastUid(u)) { long[] uids = batch; long[][] fwds = store.BatchedGetLinks(uids, Dir.Fwd); var map = new UidMap(fwds); map.Add(uids); var xRoots = GetRoots(roots, map); for (int i = 0; i < fwds.Length; i++) { uids[i] = xRoots[map[uids[i]]]; for (int j = 0; j < fwds[i].Length; j++) { fwds[i][j] = xRoots[map[fwds[i][j]]]; } } map = new UidMap(fwds); map.Add(uids); long[] reprs = roots.GetMany(map); for (int i = 0; i < fwds.Length; i++) { long A = uids[i]; long a = map[A]; while (A != reprs[a]) { A = reprs[a]; a = map[A]; } for (int j = 0; j < fwds[i].Length; j++) { long B = fwds[i][j]; long b = map[B]; while (B != reprs[b]) { B = reprs[b]; b = map[B]; } if (reprs[a] < reprs[b]) { reprs[b] = reprs[a]; } else { reprs[a] = reprs[b]; a = b; } } } roots.SetMany(map, reprs); batch.Reset(); } } batch = new Batch <long>(400000); foreach (long u in store.Uids()) { batch.Add(u); if (batch.Full || store.IsLastUid(u)) { GetRoots(roots, batch); batch.Reset(); } } using (var sorter = new DiskSorter <UidVal <long> >(new Comparer(), Write, Read, 100000000)) { foreach (var uv in roots.GetAll()) { sorter.Add(uv); } sorter.Sort(); using (var wccWr = new BinaryWriter(new BufferedStream(new FileStream("wcc-main.bin", FileMode.Create, FileAccess.Write)))) { using (var idxWr = new BinaryWriter(new BufferedStream(new FileStream("wcc-index.bin", FileMode.Create, FileAccess.Write)))) { long last = 0; long lastRoot = -1; for (long i = 0; i < sorter.Total; i++) { var uv = sorter.Get(); wccWr.Write(uv.uid); if (i == 0) { lastRoot = uv.val; } else if (uv.val != lastRoot) { idxWr.Write(i - last); idxWr.Write(last); last = i; lastRoot = uv.val; } } Debug.Assert(sorter.AtEnd()); if (sorter.Total > 0) { idxWr.Write(sorter.Total - last); idxWr.Write(last); } } } } var dict = new System.Collections.Generic.Dictionary <long, long>(); using (var rd = new BinaryReader(new BufferedStream(new FileStream("wcc-index.bin", FileMode.Open, FileAccess.Read)))) { while (true) { try { long size = rd.ReadInt64(); long pos = rd.ReadInt64(); if (!dict.ContainsKey(size)) { dict[size] = 0; } dict[size]++; } catch (EndOfStreamException) { break; } } } long maxSize = 0; long numWCCs = 0; foreach (var kv in dict) { if (kv.Key > maxSize) { maxSize = kv.Key; } numWCCs += kv.Value; } Console.WriteLine("Done. {0} weakly connected components, largest has {1} nodes. Job took {2} seconds.", numWCCs, maxSize, 0.001 * sw.ElapsedMilliseconds); } }
public static void Main(string[] args) { var shs = new Service(args[0]).OpenStore(Guid.Parse(args[1])); using (var rd = new BinaryReader(new BufferedStream(new FileStream(args[2], FileMode.Open, FileAccess.Read)))) { int bs = int.Parse(args[3]); int fs = int.Parse(args[4]); while (true) { try { int queryId = rd.ReadInt32(); int numUrls = rd.ReadInt32(); var urls = new string[numUrls]; for (int i = 0; i < numUrls; i++) { urls[i] = rd.ReadString(); } var uids = shs.BatchedUrlToUid(urls); var tbl = new UidMap(uids); var bwdUids = shs.BatchedSampleLinks(tbl, Dir.Bwd, bs, true); var fwdUids = shs.BatchedSampleLinks(tbl, Dir.Fwd, fs, true); foreach (long[] x in bwdUids) { tbl.Add(x); } foreach (long[] x in fwdUids) { tbl.Add(x); } long[] srcUids = tbl; var dstUids = shs.BatchedGetLinks(srcUids, Dir.Fwd); int n = dstUids.Length; var srcId = new List <int> [n]; var dstId = new List <int> [n]; for (int i = 0; i < n; i++) { srcId[i] = new List <int>(); dstId[i] = new List <int>(); } for (int i = 0; i < n; i++) { int sid = tbl[srcUids[i]]; for (int j = 0; j < dstUids[i].Length; j++) { int did = tbl[dstUids[i][j]]; if (did != -1) { srcId[sid].Add(did); dstId[did].Add(sid); } } } int numAuts = 0; for (int i = 0; i < n; i++) { if (dstId[i].Count > 0) { numAuts++; } } double initAut = 1.0 / numAuts; var aut = new double[n]; var tmp = new double[n]; for (int i = 0; i < n; i++) { aut[i] = dstId[i].Count > 0 ? initAut : 0.0; } for (int k = 0; k < 100; k++) { for (int u = 0; u < n; u++) { foreach (var id in dstId[u]) { tmp[id] += (aut[u] / dstId[u].Count); } aut[u] = 0.0; } for (int u = 0; u < n; u++) { foreach (var id in srcId[u]) { aut[id] += (tmp[u] / srcId[u].Count); } tmp[u] = 0.0; } } var scores = new double[urls.Length]; for (int i = 0; i < scores.Length; i++) { scores[i] = uids[i] == -1 ? 0.0 : aut[tbl[uids[i]]]; } double bestScore = double.MinValue; string bestUrl = null; for (int i = 0; i < urls.Length; i++) { if (scores[i] > bestScore) { bestScore = scores[i]; bestUrl = urls[i]; } } System.Console.Error.WriteLine("{0} {1}", queryId, bestUrl); } catch (EndOfStreamException) { break; } } } }
public static void Main(string[] args) { var shs = new Service(args[0]).OpenStore(Guid.Parse(args[1])); int ITERATION_NUM = 10; //using (var rd = new BinaryReader(new BufferedStream(new FileStream(args[2], FileMode.Open, FileAccess.Read)))) { using (var rd = new StreamReader(new BufferedStream(new FileStream(args[2], FileMode.Open, FileAccess.Read)))) { int bs = int.Parse(args[3]); int fs = int.Parse(args[4]); while (true) { try { int queryId = Int32.Parse(rd.ReadLine()); int numUrls = Int32.Parse(rd.ReadLine()); var urls = new string[numUrls]; for (int i = 0; i < numUrls; i++) { urls[i] = rd.ReadLine(); } var sw = Stopwatch.StartNew(); var uids = shs.BatchedUrlToUid(urls); var tbl = new UidMap(uids); var bwdUids = shs.BatchedSampleLinks(tbl, Dir.Bwd, bs, true); var fwdUids = shs.BatchedSampleLinks(tbl, Dir.Fwd, fs, true); foreach (long[] x in bwdUids) { tbl.Add(x); } foreach (long[] x in fwdUids) { tbl.Add(x); } long[] srcUids = tbl; //long one_hope_retrieval_time = sw.ElapsedTicks; //Console.WriteLine("Retrieve 1-hops nodes: {0} from {1} root_nodes in {2} microseconds", srcUids.Length, uids.Length, one_hope_retrieval_time / 10); //sw = Stopwatch.StartNew(); var dstUids = shs.BatchedGetLinks(srcUids, Dir.Fwd); //long forward_link_of_one_hop = sw.ElapsedTicks; int n = dstUids.Length; //Console.WriteLine("Retrieve forward link of 1-hop nodes: {0} in {1} microseconds", dstUids.Length, forward_link_of_one_hop / 10); var srcId = new List <int> [n]; var dstId = new List <int> [n]; for (int i = 0; i < n; i++) { srcId[i] = new List <int>(); dstId[i] = new List <int>(); } sw = Stopwatch.StartNew(); for (int i = 0; i < n; i++) { int sid = tbl[srcUids[i]]; for (int j = 0; j < dstUids[i].Length; j++) { int did = tbl[dstUids[i][j]]; if (did != -1) { srcId[sid].Add(did); dstId[did].Add(sid); } } } long end_time = sw.ElapsedTicks; Console.WriteLine("SALSA finish in {0} microseconds", end_time / 10); int numAuts = 0; for (int i = 0; i < n; i++) { if (dstId[i].Count > 0) { numAuts++; } } double initAut = 1.0 / numAuts; var aut = new double[n]; var tmp = new double[n]; for (int i = 0; i < n; i++) { aut[i] = dstId[i].Count > 0 ? initAut : 0.0; } for (int k = 0; k < ITERATION_NUM; k++) { for (int u = 0; u < n; u++) { foreach (var id in dstId[u]) { tmp[id] += (aut[u] / dstId[u].Count); } aut[u] = 0.0; } for (int u = 0; u < n; u++) { foreach (var id in srcId[u]) { aut[id] += (tmp[u] / srcId[u].Count); } tmp[u] = 0.0; } } var scores = new double[urls.Length]; for (int i = 0; i < scores.Length; i++) { scores[i] = uids[i] == -1 ? 0.0 : aut[tbl[uids[i]]]; } //long end_time = sw.ElapsedTicks; //Console.WriteLine("SALSA finish in {0} microseconds", end_time / 10); for (int i = 0; i < scores.Length; i++) { Console.WriteLine("{0}: {1}", urls[i], scores[i]); } double bestScore = double.MinValue; string bestUrl = null; for (int i = 0; i < urls.Length; i++) { if (scores[i] > bestScore) { bestScore = scores[i]; bestUrl = urls[i]; } } System.Console.Error.WriteLine("{0} {1}", queryId, bestUrl); } catch (EndOfStreamException) { break; } } } }