public RevisionData(string revision_file) { long[] uuid = null; using (var rd = new BinaryReader(new GZipStream(new BufferedStream(new FileStream(revision_file, FileMode.Open, FileAccess.Read)), CompressionMode.Decompress))) { int i = 0; try { NUM_RECORDS = rd.ReadInt64(); NUM_LINKS = rd.ReadInt64(); fwd_revision_data = new node_revision_info[NUM_RECORDS]; uuid = new long[NUM_RECORDS]; for (i = 0; i < NUM_RECORDS; i++) { uuid[i] = (long)rd.ReadInt64(); //Console.Write("{0} ", uuid[i]); fwd_revision_data[i].outlink_vector_size = rd.ReadInt32(); //Console.Write("{0} ", revision_data[i].outlink_vector_size); fwd_revision_data[i].number_of_revision = rd.ReadInt32(); //Console.Write("{0} ", revision_data[i].number_of_revision); var bit_vector_length = fwd_revision_data[i].outlink_vector_size * fwd_revision_data[i].number_of_revision; if (bit_vector_length % 8 != 0) { bit_vector_length += (8 - bit_vector_length % 8); } fwd_revision_data[i].revision_matrix = new byte[bit_vector_length]; fwd_revision_data[i].time_duration = new long[fwd_revision_data[i].number_of_revision]; //Console.WriteLine("2"); fwd_revision_data[i].time_duration[0] = rd.ReadInt64(); for (int j = 1; j < fwd_revision_data[i].number_of_revision; j++) { fwd_revision_data[i].time_duration[j] = fwd_revision_data[i].time_duration[j] + rd.ReadInt64(); } //Console.WriteLine("3 {0}", j); for (int j = 0; j < fwd_revision_data[i].revision_matrix.Length; j++) { fwd_revision_data[i].revision_matrix[j] = rd.ReadByte(); } //Console.WriteLine(); } } catch (Exception e) { Console.Error.WriteLine("Reading error at line: {0}", i); Console.Error.WriteLine(e.Message); } } nodeMap = new UidMap(uuid); for (int i = 0; i < NUM_RECORDS; i++) { pointer[nodeMap[uuid[i]]] = i; } }
public UidMap Subtract(UidMap x) { var temp = new UidMap(); foreach (long v in vals) { if (x[v] == -1) { temp.Add(v); } } return(temp); }
public static void Main(string[] args) { if (args.Length != 4) { Console.Error.WriteLine("Usage: SHS.PageRank <leader> <store> <d> <iters>"); } else { var sw = Stopwatch.StartNew(); var store = new Service(args[0]).OpenStore(Guid.Parse(args[1])); double d = double.Parse(args[2]); int numIters = int.Parse(args[3]); long n = store.NumUrls(); using (var wr = new BinaryWriter(new BufferedStream(new FileStream("pr-scores-" + 0 + ".bin", FileMode.Create, FileAccess.Write)))) { for (long i = 0; i < n; i++) wr.Write(1.0 / n); } var scores = store.AllocateUidState<double>(); var uidBatch = new Batch<long>(50000); for (int k = 0; k < numIters; k++) { scores.SetAll(x => d / n); using (var rd = new BinaryReader(new BufferedStream(new FileStream("pr-scores-" + k + ".bin", FileMode.Open, FileAccess.Read)))) { foreach (long u in store.Uids()) { uidBatch.Add(u); if (uidBatch.Full || store.IsLastUid(u)) { var linkBatch = store.BatchedGetLinks(uidBatch, Dir.Fwd); var uniqLinks = new UidMap(linkBatch); var scoreArr = scores.GetMany(uniqLinks); foreach (var links in linkBatch) { double f = (1.0 - d) * rd.ReadDouble() / links.Length; foreach (var link in links) { scoreArr[uniqLinks[link]] += f; } } scores.SetMany(uniqLinks, scoreArr); uidBatch.Reset(); } } } using (var wr = new BinaryWriter(new BufferedStream(new FileStream("pr-scores-" + (k + 1) + ".bin", FileMode.Create, FileAccess.Write)))) { foreach (var us in scores.GetAll()) wr.Write(us.val); } File.Delete("pr-scores-" + k + ".bin"); Console.WriteLine("Iteration {0} complete", k); } Console.WriteLine("Done. {0} iterations took {1} seconds.", numIters, 0.001 * sw.ElapsedMilliseconds); } }
public static void Main(string[] args) { if (args.Length != 4) { Console.Error.WriteLine("Usage: SHS.PageRankFT <leader> <store> <d> <iters>"); } else { var sw = Stopwatch.StartNew(); var store = new Service(args[0]).OpenStore(Guid.Parse(args[1])); Action<Action> Checkpointed = delegate(Action checkpointedBlock) { while (true) { try { checkpointedBlock(); store.Checkpoint(); break; } catch (ServerFailure) { Console.Error.WriteLine("Restarting from checkpoint"); // go again } } }; double d = double.Parse(args[2]); int numIters = int.Parse(args[3]); long n = store.NumUrls(); UidState<double> oldScores = null, newScores = null; Checkpointed(delegate() { newScores = store.AllocateUidState<double>(); oldScores = store.AllocateUidState<double>(); oldScores.SetAll(uid => 1.0 / n); }); for (int k = 0; k < numIters; k++) { Checkpointed(delegate() { var uidBatch = new Batch<long>(50000); newScores.SetAll(x => d / n); foreach (long u in store.Uids()) { uidBatch.Add(u); if (uidBatch.Full || store.IsLastUid(u)) { var linkBatch = store.BatchedGetLinks(uidBatch, Dir.Fwd); var newMap = new UidMap(linkBatch); var oldSc = oldScores.GetMany(uidBatch); var newSc = newScores.GetMany(newMap); for (int i = 0; i < uidBatch.Count; i++) { var links = linkBatch[i]; double f = (1.0 - d) * oldSc[i] / links.Length; foreach (var link in links) { newSc[newMap[link]] += f; } } newScores.SetMany(newMap, newSc); uidBatch.Reset(); } } }); var tmp = newScores; newScores = oldScores; oldScores = tmp; Console.WriteLine("Done with iteration {0}", k); } using (var wr = new BinaryWriter(new BufferedStream(new FileStream("pr-scores.bin", FileMode.Create, FileAccess.Write)))) { foreach (var us in newScores.GetAll()) wr.Write(us.val); } Console.WriteLine("Done. {0} iterations took {1} seconds.", numIters, 0.001 * sw.ElapsedMilliseconds); } }
private static void ProcessBatch(Store shs, UidState<byte> dists, UidState<long> seeds, long[] uids, byte dist, Dir dir) { var uidSeeds = seeds.GetMany(uids); var nborUids = shs.BatchedGetLinks(uids, dir); var map = new UidMap(nborUids); var distChunk = dists.GetMany(map); var seedChunk = seeds.GetMany(map); for (int i = 0; i < nborUids.Length; i++) { for (int j = 0; j < nborUids[i].Length; j++) { int x = map[nborUids[i][j]]; if (distChunk[x] > dist) { distChunk[x] = dist; seedChunk[x] = uidSeeds[i]; } } } dists.SetMany(map, distChunk); seeds.SetMany(map, seedChunk); }
public static void Main(string[] args) { if (args.Length != 2) { Console.Error.WriteLine("Usage: ConvertTemporalAdjListToBitMatrixAdjList <LinkGraph.gz> <out.bin.gz>"); } else { char[] delimiter = { ' ', '\t' }; try { using (var rd = new StreamReader(new GZipStream(new FileStream(args[0], FileMode.Open, FileAccess.Read), CompressionMode.Decompress))) { using (var wr = new StreamWriter(new GZipStream(new FileStream(args[1], FileMode.OpenOrCreate, FileAccess.Write), CompressionMode.Compress))) { string line = "", currentSrc = ""; SortedSet <string> allURLs = new SortedSet <string>(); List <Revision_Links> vector_list = new List <Revision_Links>(); while ((line = rd.ReadLine()) != null) { var field = line.Split(delimiter, StringSplitOptions.RemoveEmptyEntries); if (field[0] != currentSrc && currentSrc != "") // Current URL is last URL { var allLinks_Vector = new long[allURLs.Count]; string outlink_URLs = ""; for (int i = 0; i < allLinks_Vector.Length; i++) { allLinks_Vector[i] = allURLs.Min.GetHashCode(); outlink_URLs += (allURLs.Min + " "); allURLs.Remove(allURLs.Min); } UidMap allLinks_Map = new UidMap(allLinks_Vector); var pointers = new int[allLinks_Map.GetSize()]; for (int i = 0; i < pointers.Length; i++) { pointers[allLinks_Map[allLinks_Vector[i]]] = i; } var bit_vector_length = vector_list.Count * allLinks_Vector.Length; if (bit_vector_length % 8 != 0) { bit_vector_length += (8 - bit_vector_length % 8); } byte[] bit_matrix = new byte[bit_vector_length]; var vector_list_array = vector_list.ToArray(); long[] time_diff = new long[vector_list.Count]; for (int i = 0; i < vector_list.Count; i++) { if (i > 1) { time_diff[i] = (Convert.ToDateTime(vector_list_array[i].time_Stamp) - Convert.ToDateTime(vector_list_array[i - 1].time_Stamp)).Seconds; } for (int j = 0; j < vector_list_array[i].link_Vector.Length; j++) { if (allLinks_Map[vector_list_array[i].link_Vector[j]] > -1) { var base_index = i * allLinks_Vector.Length; bit_matrix[base_index + pointers[allLinks_Map[vector_list_array[i].link_Vector[j]]]] = 1; } } } byte[] results = new byte[bit_matrix.Length / 8]; for (int i = 0; i < bit_matrix.Length / 8; i++) { for (int j = 0; j < 8; j++) { if (bit_matrix[i * 8 + j] == 1) { byte curr_position = (byte)(1 << (7 - j)); results[i] += curr_position; } } } wr.Write("{0} ", currentSrc); // Source URL wr.Write("{0} ", allLinks_Vector.Length); // Size of link vector wr.Write("{0} ", vector_list.Count); // Number of revisions wr.Write("{0} ", vector_list_array[0].time_Stamp); // First time stamp for (int i = 0; i < time_diff.Length; i++) { wr.Write("{0} ", time_diff[i]); // Time difference between time stamp } for (int i = 0; i < results.Length; i++) { wr.Write("{0} ", results[i]); // List of time revision bit matrix } wr.WriteLine("{0}", outlink_URLs); // List of out URLs //string[] urls = outlink_URLs.Split(delimiter, StringSplitOptions.RemoveEmptyEntries); //for (int i = 0; i < urls.Length; i++) //{ // wr.Write(urls[i]); //} //wr.WriteLine("{0}", urls[urls.Length - 1]); // Clear old data for new links allURLs.Clear(); vector_list.Clear(); } var revision_Vector = new long[field.Length - 2]; if (field.Length > 2) { for (int i = 2; i < field.Length; i++) { allURLs.Add(field[i]); revision_Vector[i - 2] = field[i].GetHashCode(); } } vector_list.Add(new Revision_Links { time_Stamp = field[1], link_Vector = revision_Vector }); currentSrc = field[0]; } } } } catch (Exception) { } finally { } } }
public static void Main(string[] args) { var shs = new Service(args[0]).OpenStore(Guid.Parse(args[1])); using (var rd = new BinaryReader(new BufferedStream(new FileStream(args[2], FileMode.Open, FileAccess.Read)))) { int bs = int.Parse(args[3]); int fs = int.Parse(args[4]); while (true) { try { int queryId = rd.ReadInt32(); int numUrls = rd.ReadInt32(); var urls = new string[numUrls]; for (int i = 0; i < numUrls; i++) urls[i] = rd.ReadString(); var uids = shs.BatchedUrlToUid(urls); var tbl = new UidMap(uids); var bwdUids = shs.BatchedSampleLinks(tbl, Dir.Bwd, bs, true); var fwdUids = shs.BatchedSampleLinks(tbl, Dir.Fwd, fs, true); foreach (long[] x in bwdUids) tbl.Add(x); foreach (long[] x in fwdUids) tbl.Add(x); long[] srcUids = tbl; var dstUids = shs.BatchedGetLinks(srcUids, Dir.Fwd); int n = dstUids.Length; var srcId = new List<int>[n]; var dstId = new List<int>[n]; for (int i = 0; i < n; i++) { srcId[i] = new List<int>(); dstId[i] = new List<int>(); } for (int i = 0; i < n; i++) { int sid = tbl[srcUids[i]]; for (int j = 0; j < dstUids[i].Length; j++) { int did = tbl[dstUids[i][j]]; if (did != -1) { srcId[sid].Add(did); dstId[did].Add(sid); } } } int numAuts = 0; for (int i = 0; i < n; i++) { if (dstId[i].Count > 0) numAuts++; } double initAut = 1.0 / numAuts; var aut = new double[n]; var tmp = new double[n]; for (int i = 0; i < n; i++) { aut[i] = dstId[i].Count > 0 ? initAut : 0.0; } for (int k = 0; k < 100; k++) { for (int u = 0; u < n; u++) { foreach (var id in dstId[u]) { tmp[id] += (aut[u] / dstId[u].Count); } aut[u] = 0.0; } for (int u = 0; u < n; u++) { foreach (var id in srcId[u]) { aut[id] += (tmp[u] / srcId[u].Count); } tmp[u] = 0.0; } } var scores = new double[urls.Length]; for (int i = 0; i < scores.Length; i++) { scores[i] = uids[i] == -1 ? 0.0 : aut[tbl[uids[i]]]; } double bestScore = double.MinValue; string bestUrl = null; for (int i = 0; i < urls.Length; i++) { if (scores[i] > bestScore) { bestScore = scores[i]; bestUrl = urls[i]; } } System.Console.Error.WriteLine("{0} {1}", queryId, bestUrl); } catch (EndOfStreamException) { break; } } } }