public static KeyValuePair <long, long> ConvertSURT2AdjListByUid(Store store, string args0, string args1) { char[] Sep = { ' ', '\t' }; var d0 = Convert.ToDateTime("1998-01-01"); DateTime d = d0; long num_links = 0; long fwd_num_list = 0; long bwd_num_list = 0; string line = ""; int NUM_CACHE = 100000; string[] links = new string[3]; string[] cache = new string[2 * NUM_CACHE]; long[] temporal = new long[NUM_CACHE]; int current_count = 0; long[] out_link; var bwd_sorter = new DiskSorter <TemporalNodeIdLinks>(new TemporalNodeIdLinks.Comparer(), TemporalNodeIdLinks.Write, TemporalNodeIdLinks.Read, 1 << 25); try { using (var rd = new StreamReader(new GZipStream(new FileStream(args0, FileMode.Open, FileAccess.Read), CompressionMode.Decompress))) using (var wr = new StreamWriter(new GZipStream(new FileStream(args1, FileMode.OpenOrCreate, FileAccess.Write), CompressionMode.Compress))) using (var sorter = new DiskSorter <TemporalNodeIdLinks>(new TemporalNodeIdLinks.Comparer(), TemporalNodeIdLinks.Write, TemporalNodeIdLinks.Read, 1 << 25)) { while ((line = rd.ReadLine()) != null) { links = line.Split(Sep, StringSplitOptions.RemoveEmptyEntries); temporal[current_count] = (Convert.ToDateTime(links[2] + "-28") - d0).Days; cache[2 * current_count] = getNormalURLFormat(links[0]); cache[2 * current_count + 1] = getNormalURLFormat(links[1]); //if ((String.IsNullOrWhiteSpace(cache[2 * current_count])) || (String.IsNullOrWhiteSpace(cache[2 * current_count + 1]))) //{ // Console.WriteLine("ConvertSURT2AdjListByUid Error {0}", cache[2 * current_count]); // Console.WriteLine("ConvertSURT2AdjListByUid Error {0}", cache[2 * current_count + 1]); //} if (current_count == (NUM_CACHE - 1)) { out_link = store.BatchedUrlToUid(cache); for (int i = 0; i < NUM_CACHE; i++) { if (out_link[2 * i] != -1 && out_link[2 * i + 1] != -1) { sorter.Add(new TemporalNodeIdLinks { source_node = out_link[2 * i], time = temporal[i], dest_node = out_link[2 * i + 1] }); bwd_sorter.Add(new TemporalNodeIdLinks { source_node = out_link[2 * i + 1], time = temporal[i], dest_node = out_link[2 * i] }); } } current_count = -1; Console.WriteLine("Next 1mil links finished..."); } current_count++; } if (current_count > 0) { current_count--; string[] restUid = new string[current_count]; Array.Copy(cache, restUid, current_count); out_link = store.BatchedUrlToUid(restUid); for (int i = 0; i < current_count / 2; i++) { if (out_link[2 * i] != -1 && out_link[2 * i + 1] != -1) { sorter.Add(new TemporalNodeIdLinks { source_node = out_link[2 * i], time = temporal[i], dest_node = out_link[2 * i + 1] }); bwd_sorter.Add(new TemporalNodeIdLinks { source_node = out_link[2 * i + 1], time = temporal[i], dest_node = out_link[2 * i] }); } } } sorter.Sort(); num_links = sorter.Total; Console.WriteLine("{0} links", num_links); TemporalNodeIdLinks temporalFwdLink = sorter.Get(); var current = temporalFwdLink; wr.WriteLine(num_links); while (!sorter.AtEnd()) { wr.Write(current.source_node + "\t" + current.time + "\t"); while ((current.source_node == temporalFwdLink.source_node) && (current.time == temporalFwdLink.time) && !sorter.AtEnd()) { wr.Write(temporalFwdLink.dest_node + "\t"); temporalFwdLink = sorter.Get(); } // Consider to change to UUID here wr.WriteLine(); current = temporalFwdLink; fwd_num_list++; } Console.WriteLine("Finished build links to adjacency list by string..."); } using (var wr = new StreamWriter(new GZipStream(new FileStream("Bwd_" + args1, FileMode.OpenOrCreate, FileAccess.Write), CompressionMode.Compress))) { bwd_sorter.Sort(); Console.WriteLine("{0} links", bwd_sorter.Total); TemporalNodeIdLinks temporalFwdLink = bwd_sorter.Get(); TemporalNodeIdLinks current = temporalFwdLink; wr.WriteLine(num_links); while (!bwd_sorter.AtEnd()) { wr.Write(current.source_node + "\t" + current.time + "\t"); while ((current.source_node == temporalFwdLink.source_node) && (current.time == temporalFwdLink.time) && !bwd_sorter.AtEnd()) { wr.Write(temporalFwdLink.dest_node + "\t"); temporalFwdLink = bwd_sorter.Get(); } // Consider to change to UUID here wr.WriteLine(); current = temporalFwdLink; bwd_num_list++; } } } catch (Exception e) { Console.Error.WriteLine(e.Message); Console.Error.WriteLine(e.Source); Console.Error.WriteLine(e.StackTrace); Console.Error.WriteLine(line); } finally { } return(new KeyValuePair <long, long>(fwd_num_list, bwd_num_list)); }
public static KeyValuePair <long, long> ConvertTempLinks2AdjListByUid(Store store, string args0, string args1) { char[] Sep = { ' ', '\t' }; var d0 = Convert.ToDateTime("1998-01-01"); DateTime d = d0; long num_links = 0; long fwd_num_list = 0; long bwd_num_list = 0; string line = ""; int NUM_CACHE = 10000000; string[] links = new string[3]; string[] cache = new string[2 * NUM_CACHE]; long[][] temporal = new long[NUM_CACHE][]; int current_count = 0; SortedSet <string> url_set = new SortedSet <string>(); SortedDictionary <string, long> mapped = new SortedDictionary <string, long>(); var bwd_sorter = new DiskSorter <TemporalNodeIdLinks>(new TemporalNodeIdLinks.Comparer(), TemporalNodeIdLinks.Write, TemporalNodeIdLinks.Read, 1 << 25); try { using (var rd = new StreamReader(new GZipStream(new FileStream(args0, FileMode.Open, FileAccess.Read), CompressionMode.Decompress))) using (var wr = new StreamWriter(new GZipStream(new FileStream(args1, FileMode.OpenOrCreate, FileAccess.Write), CompressionMode.Compress))) using (var sorter = new DiskSorter <TemporalNodeIdLinks>(new TemporalNodeIdLinks.Comparer(), TemporalNodeIdLinks.Write, TemporalNodeIdLinks.Read, 1 << 25)) { while ((line = rd.ReadLine()) != null) { links = line.Split(Sep, StringSplitOptions.RemoveEmptyEntries); temporal[current_count] = new long[line.Length - 3]; temporal[current_count][0] = (Convert.ToDateTime(links[2]) - d0).Days; for (int t = 3; t < links.Length - 1; t++) { temporal[current_count][t - 2] = (Convert.ToDateTime(links[t]) - d0).Days; } cache[2 * current_count] = links[0]; cache[2 * current_count + 1] = links[1]; url_set.Add(links[0]); url_set.Add(links[1]); if (current_count == (NUM_CACHE - 1)) { string[] unique_urls = new string[url_set.Count]; url_set.CopyTo(unique_urls); var uids = store.BatchedUrlToUid(unique_urls); for (int i = 0; i < unique_urls.Length; i++) { try { mapped.Add(unique_urls[i], uids[i]); } catch (Exception) { Console.WriteLine("Error mapping..."); } } for (int i = 0; i < NUM_CACHE; i++) { long mSource, mDest; mapped.TryGetValue(cache[2 * i], out mSource); mapped.TryGetValue(cache[2 * i + 1], out mDest); if (mSource != -1 && mDest != -1) { for (int t = 0; t < temporal[i].Length; t++) { sorter.Add(new TemporalNodeIdLinks { source_node = mSource, time = temporal[i][t], dest_node = mDest }); bwd_sorter.Add(new TemporalNodeIdLinks { source_node = mDest, time = temporal[i][t], dest_node = mSource }); } } } current_count = -1; url_set = new SortedSet <string>(); mapped = new SortedDictionary <string, long>(); Console.WriteLine("Next {0} links finished...", NUM_CACHE); } current_count++; } if (current_count > 0) { current_count--; string[] unique_urls = new string[url_set.Count]; url_set.CopyTo(unique_urls); var uids = store.BatchedUrlToUid(unique_urls); for (int i = 0; i < unique_urls.Length; i++) { try { mapped.Add(unique_urls[i], uids[i]); } catch (Exception) { Console.WriteLine("Error mapping..."); } } for (int i = 0; i < current_count; i++) { long mSource, mDest; mapped.TryGetValue(cache[2 * i], out mSource); mapped.TryGetValue(cache[2 * i + 1], out mDest); if (mSource != -1 && mDest != -1) { for (int t = 0; t < temporal[i].Length; t++) { sorter.Add(new TemporalNodeIdLinks { source_node = mSource, time = temporal[i][t], dest_node = mDest }); bwd_sorter.Add(new TemporalNodeIdLinks { source_node = mDest, time = temporal[i][t], dest_node = mSource }); } } } Console.WriteLine("Next {0} links finished...", current_count); } sorter.Sort(); num_links = sorter.Total; Console.WriteLine("{0} links", num_links); TemporalNodeIdLinks temporalFwdLink = sorter.Get(); var current = temporalFwdLink; wr.WriteLine(num_links); while (!sorter.AtEnd()) { wr.Write(current.source_node + "\t" + current.time + "\t"); while ((current.source_node == temporalFwdLink.source_node) && (current.time == temporalFwdLink.time) && !sorter.AtEnd()) { wr.Write(temporalFwdLink.dest_node + "\t"); temporalFwdLink = sorter.Get(); } // Consider to change to UUID here wr.WriteLine(); current = temporalFwdLink; fwd_num_list++; } Console.WriteLine("Finished build links to adjacency list by string..."); } using (var wr = new StreamWriter(new GZipStream(new FileStream("Bwd_" + args1, FileMode.OpenOrCreate, FileAccess.Write), CompressionMode.Compress))) { bwd_sorter.Sort(); Console.WriteLine("{0} links", bwd_sorter.Total); TemporalNodeIdLinks temporalFwdLink = bwd_sorter.Get(); TemporalNodeIdLinks current = temporalFwdLink; wr.WriteLine(num_links); while (!bwd_sorter.AtEnd()) { wr.Write(current.source_node + "\t" + current.time + "\t"); while ((current.source_node == temporalFwdLink.source_node) && (current.time == temporalFwdLink.time) && !bwd_sorter.AtEnd()) { wr.Write(temporalFwdLink.dest_node + "\t"); temporalFwdLink = bwd_sorter.Get(); } // Consider to change to UUID here wr.WriteLine(); current = temporalFwdLink; bwd_num_list++; } } } catch (Exception e) { Console.Error.WriteLine(e.Message); Console.Error.WriteLine(e.Source); Console.Error.WriteLine(e.StackTrace); Console.Error.WriteLine(line); } finally { } return(new KeyValuePair <long, long>(fwd_num_list, bwd_num_list)); }
public static KeyValuePair <long, long> AttempConvertTempLinks2AdjListByUid(Store store, string args0, string args1) { char[] Sep = { ' ', '\t' }; var d0 = Convert.ToDateTime("1998-01-01"); DateTime d = d0; long num_links = 0; long fwd_num_list = 0; long bwd_num_list = 0; string line = ""; string[] links = new string[3]; long temporal; SortedDictionary <string, long> mapped = buildMapping(store, args0); var bwd_sorter = new DiskSorter <TemporalNodeIdLinks>(new TemporalNodeIdLinks.Comparer(), TemporalNodeIdLinks.Write, TemporalNodeIdLinks.Read, 1 << 25); try { using (var rd = new StreamReader(new GZipStream(new FileStream(args0, FileMode.Open, FileAccess.Read), CompressionMode.Decompress))) using (var wr = new StreamWriter(new GZipStream(new FileStream(args1, FileMode.OpenOrCreate, FileAccess.Write), CompressionMode.Compress))) using (var sorter = new DiskSorter <TemporalNodeIdLinks>(new TemporalNodeIdLinks.Comparer(), TemporalNodeIdLinks.Write, TemporalNodeIdLinks.Read, 1 << 25)) { while ((line = rd.ReadLine()) != null) { links = line.Split(Sep, StringSplitOptions.RemoveEmptyEntries); for (int t = 3; t < links.Length - 1; t++) { temporal = (Convert.ToDateTime(links[t]) - d0).Days; long src; mapped.TryGetValue(links[0], out src); long dest; mapped.TryGetValue(links[0], out dest); sorter.Add(new TemporalNodeIdLinks { source_node = src, time = temporal, dest_node = dest }); bwd_sorter.Add(new TemporalNodeIdLinks { source_node = dest, time = temporal, dest_node = src }); } } sorter.Sort(); num_links = sorter.Total; Console.WriteLine("{0} links", num_links); TemporalNodeIdLinks temporalFwdLink = sorter.Get(); var current = temporalFwdLink; wr.WriteLine(num_links); while (!sorter.AtEnd()) { wr.Write(current.source_node + "\t" + current.time + "\t"); while ((current.source_node == temporalFwdLink.source_node) && (current.time == temporalFwdLink.time) && !sorter.AtEnd()) { wr.Write(temporalFwdLink.dest_node + "\t"); temporalFwdLink = sorter.Get(); } // Consider to change to UUID here wr.WriteLine(); current = temporalFwdLink; fwd_num_list++; } Console.WriteLine("Finished build links to adjacency list by string..."); } using (var wr = new StreamWriter(new GZipStream(new FileStream("Bwd_" + args1, FileMode.OpenOrCreate, FileAccess.Write), CompressionMode.Compress))) { bwd_sorter.Sort(); Console.WriteLine("{0} links", bwd_sorter.Total); TemporalNodeIdLinks temporalFwdLink = bwd_sorter.Get(); TemporalNodeIdLinks current = temporalFwdLink; wr.WriteLine(num_links); while (!bwd_sorter.AtEnd()) { wr.Write(current.source_node + "\t" + current.time + "\t"); while ((current.source_node == temporalFwdLink.source_node) && (current.time == temporalFwdLink.time) && !bwd_sorter.AtEnd()) { wr.Write(temporalFwdLink.dest_node + "\t"); temporalFwdLink = bwd_sorter.Get(); } // Consider to change to UUID here wr.WriteLine(); current = temporalFwdLink; bwd_num_list++; } } } catch (Exception e) { Console.Error.WriteLine(e.Message); Console.Error.WriteLine(e.Source); Console.Error.WriteLine(e.StackTrace); Console.Error.WriteLine(line); } finally { } return(new KeyValuePair <long, long>(fwd_num_list, bwd_num_list)); }
internal static void Write(BinaryWriter wr, TemporalNodeIdLinks a) { wr.Write(a.dest_node); wr.Write(a.time); wr.Write(a.source_node); }