public RevisionData(string revision_file)
        {
            long[] uuid = null;
            using (var rd = new BinaryReader(new GZipStream(new BufferedStream(new FileStream(revision_file, FileMode.Open, FileAccess.Read)), CompressionMode.Decompress)))
            {
                int i = 0;
                try
                {
                    NUM_RECORDS       = rd.ReadInt64();
                    NUM_LINKS         = rd.ReadInt64();
                    fwd_revision_data = new node_revision_info[NUM_RECORDS];
                    uuid = new long[NUM_RECORDS];
                    for (i = 0; i < NUM_RECORDS; i++)
                    {
                        uuid[i] = (long)rd.ReadInt64();
                        //Console.Write("{0} ", uuid[i]);
                        fwd_revision_data[i].outlink_vector_size = rd.ReadInt32();
                        //Console.Write("{0} ", revision_data[i].outlink_vector_size);
                        fwd_revision_data[i].number_of_revision = rd.ReadInt32();
                        //Console.Write("{0} ", revision_data[i].number_of_revision);

                        var bit_vector_length = fwd_revision_data[i].outlink_vector_size * fwd_revision_data[i].number_of_revision;

                        if (bit_vector_length % 8 != 0)
                        {
                            bit_vector_length += (8 - bit_vector_length % 8);
                        }

                        fwd_revision_data[i].revision_matrix = new byte[bit_vector_length];
                        fwd_revision_data[i].time_duration   = new long[fwd_revision_data[i].number_of_revision];
                        //Console.WriteLine("2");
                        fwd_revision_data[i].time_duration[0] = rd.ReadInt64();
                        for (int j = 1; j < fwd_revision_data[i].number_of_revision; j++)
                        {
                            fwd_revision_data[i].time_duration[j] = fwd_revision_data[i].time_duration[j] + rd.ReadInt64();
                        }
                        //Console.WriteLine("3 {0}", j);

                        for (int j = 0; j < fwd_revision_data[i].revision_matrix.Length; j++)
                        {
                            fwd_revision_data[i].revision_matrix[j] = rd.ReadByte();
                        }

                        //Console.WriteLine();
                    }
                }
                catch (Exception e)
                {
                    Console.Error.WriteLine("Reading error at line: {0}", i);
                    Console.Error.WriteLine(e.Message);
                }
            }
            nodeMap = new UidMap(uuid);

            for (int i = 0; i < NUM_RECORDS; i++)
            {
                pointer[nodeMap[uuid[i]]] = i;
            }
        }
示例#2
0
        public UidMap Subtract(UidMap x)
        {
            var temp = new UidMap();

            foreach (long v in vals)
            {
                if (x[v] == -1)
                {
                    temp.Add(v);
                }
            }
            return(temp);
        }
示例#3
0
文件: PageRank.cs 项目: pszmyd/SHS
 public static void Main(string[] args)
 {
     if (args.Length != 4) {
       Console.Error.WriteLine("Usage: SHS.PageRank <leader> <store> <d> <iters>");
     } else {
       var sw = Stopwatch.StartNew();
       var store = new Service(args[0]).OpenStore(Guid.Parse(args[1]));
       double d = double.Parse(args[2]);
       int numIters = int.Parse(args[3]);
       long n = store.NumUrls();
       using (var wr = new BinaryWriter(new BufferedStream(new FileStream("pr-scores-" + 0 + ".bin", FileMode.Create, FileAccess.Write)))) {
     for (long i = 0; i < n; i++) wr.Write(1.0 / n);
       }
       var scores = store.AllocateUidState<double>();
       var uidBatch = new Batch<long>(50000);
       for (int k = 0; k < numIters; k++) {
     scores.SetAll(x => d / n);
     using (var rd = new BinaryReader(new BufferedStream(new FileStream("pr-scores-" + k + ".bin", FileMode.Open, FileAccess.Read)))) {
       foreach (long u in store.Uids()) {
         uidBatch.Add(u);
         if (uidBatch.Full || store.IsLastUid(u)) {
           var linkBatch = store.BatchedGetLinks(uidBatch, Dir.Fwd);
           var uniqLinks = new UidMap(linkBatch);
           var scoreArr = scores.GetMany(uniqLinks);
           foreach (var links in linkBatch) {
             double f = (1.0 - d) * rd.ReadDouble() / links.Length;
             foreach (var link in links) {
               scoreArr[uniqLinks[link]] += f;
             }
           }
           scores.SetMany(uniqLinks, scoreArr);
           uidBatch.Reset();
         }
       }
     }
     using (var wr = new BinaryWriter(new BufferedStream(new FileStream("pr-scores-" + (k + 1) + ".bin", FileMode.Create, FileAccess.Write)))) {
       foreach (var us in scores.GetAll()) wr.Write(us.val);
     }
     File.Delete("pr-scores-" + k + ".bin");
     Console.WriteLine("Iteration {0} complete", k);
       }
       Console.WriteLine("Done. {0} iterations took {1} seconds.", numIters, 0.001 * sw.ElapsedMilliseconds);
     }
 }
示例#4
0
文件: PageRankFT.cs 项目: pszmyd/SHS
    public static void Main(string[] args)
    {
        if (args.Length != 4) {
          Console.Error.WriteLine("Usage: SHS.PageRankFT <leader> <store> <d> <iters>");
        } else {
          var sw = Stopwatch.StartNew();
          var store = new Service(args[0]).OpenStore(Guid.Parse(args[1]));

          Action<Action> Checkpointed = delegate(Action checkpointedBlock) {
        while (true) {
          try {
            checkpointedBlock();
            store.Checkpoint();
            break;
          } catch (ServerFailure) {
            Console.Error.WriteLine("Restarting from checkpoint");
            // go again
          }
        }
          };

          double d = double.Parse(args[2]);
          int numIters = int.Parse(args[3]);
          long n = store.NumUrls();

          UidState<double> oldScores = null, newScores = null;

          Checkpointed(delegate() {
        newScores = store.AllocateUidState<double>();
        oldScores = store.AllocateUidState<double>();
        oldScores.SetAll(uid => 1.0 / n);
          });

          for (int k = 0; k < numIters; k++) {
        Checkpointed(delegate() {
          var uidBatch = new Batch<long>(50000);
          newScores.SetAll(x => d / n);
          foreach (long u in store.Uids()) {
            uidBatch.Add(u);
            if (uidBatch.Full || store.IsLastUid(u)) {
              var linkBatch = store.BatchedGetLinks(uidBatch, Dir.Fwd);
              var newMap = new UidMap(linkBatch);
              var oldSc = oldScores.GetMany(uidBatch);
              var newSc = newScores.GetMany(newMap);
              for (int i = 0; i < uidBatch.Count; i++) {
                var links = linkBatch[i];
                double f = (1.0 - d) * oldSc[i] / links.Length;
                foreach (var link in links) {
                  newSc[newMap[link]] += f;
                }
              }
              newScores.SetMany(newMap, newSc);
              uidBatch.Reset();
            }
          }
        });
        var tmp = newScores; newScores = oldScores; oldScores = tmp;
        Console.WriteLine("Done with iteration {0}", k);
          }
          using (var wr = new BinaryWriter(new BufferedStream(new FileStream("pr-scores.bin", FileMode.Create, FileAccess.Write)))) {
        foreach (var us in newScores.GetAll()) wr.Write(us.val);
          }
          Console.WriteLine("Done. {0} iterations took {1} seconds.", numIters, 0.001 * sw.ElapsedMilliseconds);
        }
    }
示例#5
0
文件: ASP.cs 项目: pszmyd/SHS
 private static void ProcessBatch(Store shs, UidState<byte> dists, UidState<long> seeds, long[] uids, byte dist, Dir dir)
 {
     var uidSeeds = seeds.GetMany(uids);
     var nborUids = shs.BatchedGetLinks(uids, dir);
     var map = new UidMap(nborUids);
     var distChunk = dists.GetMany(map);
     var seedChunk = seeds.GetMany(map);
     for (int i = 0; i < nborUids.Length; i++) {
       for (int j = 0; j < nborUids[i].Length; j++) {
     int x = map[nborUids[i][j]];
     if (distChunk[x] > dist) {
       distChunk[x] = dist;
       seedChunk[x] = uidSeeds[i];
     }
       }
     }
     dists.SetMany(map, distChunk);
     seeds.SetMany(map, seedChunk);
 }
示例#6
0
        public static void Main(string[] args)
        {
            if (args.Length != 2)
            {
                Console.Error.WriteLine("Usage: ConvertTemporalAdjListToBitMatrixAdjList <LinkGraph.gz> <out.bin.gz>");
            }
            else
            {
                char[] delimiter = { ' ', '\t' };
                try
                {
                    using (var rd = new StreamReader(new GZipStream(new FileStream(args[0], FileMode.Open, FileAccess.Read), CompressionMode.Decompress)))
                    {
                        using (var wr = new StreamWriter(new GZipStream(new FileStream(args[1], FileMode.OpenOrCreate, FileAccess.Write), CompressionMode.Compress)))
                        {
                            string                line = "", currentSrc = "";
                            SortedSet <string>    allURLs     = new SortedSet <string>();
                            List <Revision_Links> vector_list = new List <Revision_Links>();

                            while ((line = rd.ReadLine()) != null)
                            {
                                var field = line.Split(delimiter, StringSplitOptions.RemoveEmptyEntries);
                                if (field[0] != currentSrc && currentSrc != "")   // Current URL is last URL
                                {
                                    var    allLinks_Vector = new long[allURLs.Count];
                                    string outlink_URLs    = "";
                                    for (int i = 0; i < allLinks_Vector.Length; i++)
                                    {
                                        allLinks_Vector[i] = allURLs.Min.GetHashCode();
                                        outlink_URLs      += (allURLs.Min + " ");
                                        allURLs.Remove(allURLs.Min);
                                    }
                                    UidMap allLinks_Map = new UidMap(allLinks_Vector);
                                    var    pointers     = new int[allLinks_Map.GetSize()];

                                    for (int i = 0; i < pointers.Length; i++)
                                    {
                                        pointers[allLinks_Map[allLinks_Vector[i]]] = i;
                                    }

                                    var bit_vector_length = vector_list.Count * allLinks_Vector.Length;

                                    if (bit_vector_length % 8 != 0)
                                    {
                                        bit_vector_length += (8 - bit_vector_length % 8);
                                    }

                                    byte[] bit_matrix        = new byte[bit_vector_length];
                                    var    vector_list_array = vector_list.ToArray();

                                    long[] time_diff = new long[vector_list.Count];


                                    for (int i = 0; i < vector_list.Count; i++)
                                    {
                                        if (i > 1)
                                        {
                                            time_diff[i] = (Convert.ToDateTime(vector_list_array[i].time_Stamp) - Convert.ToDateTime(vector_list_array[i - 1].time_Stamp)).Seconds;
                                        }

                                        for (int j = 0; j < vector_list_array[i].link_Vector.Length; j++)
                                        {
                                            if (allLinks_Map[vector_list_array[i].link_Vector[j]] > -1)
                                            {
                                                var base_index = i * allLinks_Vector.Length;
                                                bit_matrix[base_index + pointers[allLinks_Map[vector_list_array[i].link_Vector[j]]]] = 1;
                                            }
                                        }
                                    }

                                    byte[] results = new byte[bit_matrix.Length / 8];

                                    for (int i = 0; i < bit_matrix.Length / 8; i++)
                                    {
                                        for (int j = 0; j < 8; j++)
                                        {
                                            if (bit_matrix[i * 8 + j] == 1)
                                            {
                                                byte curr_position = (byte)(1 << (7 - j));
                                                results[i] += curr_position;
                                            }
                                        }
                                    }

                                    wr.Write("{0} ", currentSrc);                       // Source URL
                                    wr.Write("{0} ", allLinks_Vector.Length);           // Size of link vector
                                    wr.Write("{0} ", vector_list.Count);                // Number of revisions
                                    wr.Write("{0} ", vector_list_array[0].time_Stamp);  // First time stamp


                                    for (int i = 0; i < time_diff.Length; i++)
                                    {
                                        wr.Write("{0} ", time_diff[i]);                 // Time difference between time stamp
                                    }

                                    for (int i = 0; i < results.Length; i++)
                                    {
                                        wr.Write("{0} ", results[i]);                   // List of time revision bit matrix
                                    }

                                    wr.WriteLine("{0}", outlink_URLs);                  // List of out URLs

                                    //string[] urls = outlink_URLs.Split(delimiter, StringSplitOptions.RemoveEmptyEntries);
                                    //for (int i = 0; i < urls.Length; i++)
                                    //{
                                    //    wr.Write(urls[i]);
                                    //}

                                    //wr.WriteLine("{0}", urls[urls.Length - 1]);

                                    // Clear old data for new links
                                    allURLs.Clear();
                                    vector_list.Clear();
                                }


                                var revision_Vector = new long[field.Length - 2];
                                if (field.Length > 2)
                                {
                                    for (int i = 2; i < field.Length; i++)
                                    {
                                        allURLs.Add(field[i]);
                                        revision_Vector[i - 2] = field[i].GetHashCode();
                                    }
                                }
                                vector_list.Add(new Revision_Links {
                                    time_Stamp = field[1], link_Vector = revision_Vector
                                });
                                currentSrc = field[0];
                            }
                        }
                    }
                }
                catch (Exception)
                {
                }
                finally
                {
                }
            }
        }
示例#7
0
文件: Salsa.cs 项目: pszmyd/SHS
 public static void Main(string[] args)
 {
     var shs = new Service(args[0]).OpenStore(Guid.Parse(args[1]));
     using (var rd = new BinaryReader(new BufferedStream(new FileStream(args[2], FileMode.Open, FileAccess.Read)))) {
       int bs = int.Parse(args[3]);
       int fs = int.Parse(args[4]);
       while (true) {
     try {
       int queryId = rd.ReadInt32();
       int numUrls = rd.ReadInt32();
       var urls = new string[numUrls];
       for (int i = 0; i < numUrls; i++) urls[i] = rd.ReadString();
       var uids = shs.BatchedUrlToUid(urls);
       var tbl = new UidMap(uids);
       var bwdUids = shs.BatchedSampleLinks(tbl, Dir.Bwd, bs, true);
       var fwdUids = shs.BatchedSampleLinks(tbl, Dir.Fwd, fs, true);
       foreach (long[] x in bwdUids) tbl.Add(x);
       foreach (long[] x in fwdUids) tbl.Add(x);
       long[] srcUids = tbl;
       var dstUids = shs.BatchedGetLinks(srcUids, Dir.Fwd);
       int n = dstUids.Length;
       var srcId = new List<int>[n];
       var dstId = new List<int>[n];
       for (int i = 0; i < n; i++) {
         srcId[i] = new List<int>();
         dstId[i] = new List<int>();
       }
       for (int i = 0; i < n; i++) {
         int sid = tbl[srcUids[i]];
         for (int j = 0; j < dstUids[i].Length; j++) {
           int did = tbl[dstUids[i][j]];
           if (did != -1) {
             srcId[sid].Add(did);
             dstId[did].Add(sid);
           }
         }
       }
       int numAuts = 0;
       for (int i = 0; i < n; i++) {
         if (dstId[i].Count > 0) numAuts++;
       }
       double initAut = 1.0 / numAuts;
       var aut = new double[n];
       var tmp = new double[n];
       for (int i = 0; i < n; i++) {
         aut[i] = dstId[i].Count > 0 ? initAut : 0.0;
       }
       for (int k = 0; k < 100; k++) {
         for (int u = 0; u < n; u++) {
           foreach (var id in dstId[u]) {
             tmp[id] += (aut[u] / dstId[u].Count);
           }
           aut[u] = 0.0;
         }
         for (int u = 0; u < n; u++) {
           foreach (var id in srcId[u]) {
             aut[id] += (tmp[u] / srcId[u].Count);
           }
           tmp[u] = 0.0;
         }
       }
       var scores = new double[urls.Length];
       for (int i = 0; i < scores.Length; i++) {
         scores[i] = uids[i] == -1 ? 0.0 : aut[tbl[uids[i]]];
       }
       double bestScore = double.MinValue;
       string bestUrl = null;
       for (int i = 0; i < urls.Length; i++) {
         if (scores[i] > bestScore) {
           bestScore = scores[i];
           bestUrl = urls[i];
         }
       }
       System.Console.Error.WriteLine("{0} {1}", queryId, bestUrl);
     } catch (EndOfStreamException) {
       break;
     }
       }
     }
 }