예제 #1
0
 private void MapUid(Guid uid)
 {
     if (!UidMap.ContainsKey(uid))
     {
         UidMap[uid] = NextIndex();
     }
 }
예제 #2
0
 public static void Main(string[] args)
 {
     if (args.Length != 4)
     {
         Console.Error.WriteLine("Usage: SHS.PageRank <leader> <store> <d> <iters>");
     }
     else
     {
         var    sw       = Stopwatch.StartNew();
         var    store    = new Service(args[0]).OpenStore(Guid.Parse(args[1]));
         double d        = double.Parse(args[2]);
         int    numIters = int.Parse(args[3]);
         long   n        = store.NumUrls();
         using (var wr = new BinaryWriter(new BufferedStream(new FileStream("pr-scores-" + 0 + ".bin", FileMode.Create, FileAccess.Write)))) {
             for (long i = 0; i < n; i++)
             {
                 wr.Write(1.0 / n);
             }
         }
         var scores   = store.AllocateUidState <double>();
         var uidBatch = new Batch <long>(50000);
         for (int k = 0; k < numIters; k++)
         {
             scores.SetAll(x => d / n);
             using (var rd = new BinaryReader(new BufferedStream(new FileStream("pr-scores-" + k + ".bin", FileMode.Open, FileAccess.Read)))) {
                 foreach (long u in store.Uids())
                 {
                     uidBatch.Add(u);
                     if (uidBatch.Full || store.IsLastUid(u))
                     {
                         var linkBatch = store.BatchedGetLinks(uidBatch, Dir.Fwd);
                         var uniqLinks = new UidMap(linkBatch);
                         var scoreArr  = scores.GetMany(uniqLinks);
                         foreach (var links in linkBatch)
                         {
                             double f = (1.0 - d) * rd.ReadDouble() / links.Length;
                             foreach (var link in links)
                             {
                                 scoreArr[uniqLinks[link]] += f;
                             }
                         }
                         scores.SetMany(uniqLinks, scoreArr);
                         uidBatch.Reset();
                     }
                 }
             }
             using (var wr = new BinaryWriter(new BufferedStream(new FileStream("pr-scores-" + (k + 1) + ".bin", FileMode.Create, FileAccess.Write)))) {
                 foreach (var us in scores.GetAll())
                 {
                     wr.Write(us.val);
                 }
             }
             File.Delete("pr-scores-" + k + ".bin");
             Console.WriteLine("Iteration {0} complete", k);
         }
         Console.WriteLine("Done. {0} iterations took {1} seconds.", numIters, 0.001 * sw.ElapsedMilliseconds);
     }
 }
예제 #3
0
        public int Translate(Guid uid)
        {
            if (!UidMap.ContainsKey(uid))
            {
                Debugger.Launch();
            }

            return(UidMap[uid]);
        }
예제 #4
0
        public void SetExternalXRefSpec()
        {
            var result = new Dictionary <string, XRefSpec>();

            // remove internal xref.
            var xref = XRef.Where(s => !UidMap.ContainsKey(s.Key)).ToDictionary(s => s.Key, s => s.Value);

            if (xref.Count == 0)
            {
                return;
            }

            var missingUids = new List <KeyValuePair <string, HashSet <string> > >();

            if (ExternalReferencePackages.Length > 0)
            {
                using (var externalReferences = new ExternalReferencePackageCollection(ExternalReferencePackages))
                {
                    foreach (var uid in xref.Keys)
                    {
                        var spec = GetExternalReference(externalReferences, uid);
                        if (spec != null)
                        {
                            result[uid] = spec;
                        }
                        else
                        {
                            if (missingUids.Count < 100)
                            {
                                missingUids.Add(new KeyValuePair <string, HashSet <string> >(uid, xref[uid]));
                            }
                        }
                    }
                }
            }
            else
            {
                missingUids.AddRange(xref.Take(100));
            }
            if (missingUids.Count > 0)
            {
                var uidLines = string.Join(Environment.NewLine + "\t", missingUids.Select(s => "@" + s.Key + " in files \"" + string.Join(",", s.Value.Select(p => p.ToDisplayPath())) + "\""));
                if (missingUids.Count < 100)
                {
                    Logger.LogWarning($"Missing following definitions of cross-reference:{Environment.NewLine}\t{uidLines}");
                }
                else
                {
                    Logger.LogWarning($"Too many missing definitions of cross-reference, following is top 100:{Environment.NewLine}\t{uidLines}");
                }
            }
            ExternalXRefSpec = result;
        }
    private static void ProcessBatch(Store shs, UidState <byte> dists, UidState <long> seeds, long[] uids, byte dist, Dir dir)
    {
        var uidSeeds  = seeds.GetMany(uids);
        var nborUids  = shs.BatchedGetLinks(uids, dir);
        var map       = new UidMap(nborUids);
        var distChunk = dists.GetMany(map);
        var seedChunk = seeds.GetMany(map);

        for (int i = 0; i < nborUids.Length; i++)
        {
            for (int j = 0; j < nborUids[i].Length; j++)
            {
                int x = map[nborUids[i][j]];
                if (distChunk[x] > dist)
                {
                    distChunk[x] = dist;
                    seedChunk[x] = uidSeeds[i];
                }
            }
        }
        dists.SetMany(map, distChunk);
        seeds.SetMany(map, seedChunk);
    }
예제 #6
0
    private static long[] GetRoots(UidState <long> roots, long[] uids)
    {
        var  reprs = roots.GetMany(uids);
        var  map   = new UidMap();
        bool recur = false;

        for (int i = 0; i < uids.Length; i++)
        {
            if (reprs[i] != uids[i])
            {
                map.Add(reprs[i]);
                recur = true;
            }
        }
        if (recur)
        {
            var change   = false;
            var reprClos = GetRoots(roots, map);
            for (int i = 0; i < uids.Length; i++)
            {
                if (reprs[i] != uids[i])
                {
                    var rep = reprClos[map[reprs[i]]];
                    if (reprs[i] != rep)
                    {
                        reprs[i] = rep;
                        change   = true;
                    }
                }
            }
            if (change)
            {
                roots.SetMany(uids, reprs);
            }
        }
        return(reprs);
    }
    public static void Main(string[] args)
    {
        var shs = new Service(args[0]).OpenStore(Guid.Parse(args[1]));

        //using (var rd = new BinaryReader(new BufferedStream(new FileStream(args[2], FileMode.Open, FileAccess.Read)))) {
        RevisionData info = new RevisionData(args[1]);

        using (var rd = new StreamReader(new BufferedStream(new FileStream(args[2], FileMode.Open, FileAccess.Read))))
        {
            DateTime d1 = Convert.ToDateTime(args[3]);
            DateTime d2 = Convert.ToDateTime(args[4]);

            try
            {
                int queryId = Int32.Parse(rd.ReadLine());
                int numUrls = Int32.Parse(rd.ReadLine());
                var urls    = new string[numUrls];
                for (int i = 0; i < numUrls; i++)
                {
                    urls[i] = rd.ReadLine();
                }



                var sw   = Stopwatch.StartNew();
                var uids = shs.BatchedUrlToUid(urls);
                var tbl  = new UidMap(uids, true);


                long[] bwdUids  = tbl;
                var    bwdLinks = shs.BatchedSampleLinks(bwdUids, Dir.Bwd, bs, true);
                SortedDictionary <string, long> temp = new SortedDictionary <string, long>();
                for (int i = 0; i < bwdUids.Length; i++)
                {
                    var bwdValidateUids = shs.BatchedSampleLinks(bwdLinks[i], Dir.Fwd, fs, true);
                    for (int j = 0; j < bwdValidateUids.Length; j++)
                    {
                        string[] validateUrls = shs.BatchedUidToUrl(bwdValidateUids[j]);
                        temp = info.getOutlinkInDuration(bwdLinks[i][j], bwdValidateUids[j], validateUrls, d1, d2);
                        if (temp.ContainsValue(bwdUids[i]))
                        {
                        }
                    }
                    var bwdValidateUrls = shs.BatchedUidToUrl(bwdLinks[i]);
                    //info.getInlinkInDuration(bwdUids[i], bwdLinks[i], )
                }



                var fwdUids = shs.BatchedSampleLinks(tbl, Dir.Fwd, fs, true);
                var fwdUrls = shs.BatchedUidToUrl(tbl);



                foreach (long[] x in bwdLinks)
                {
                    tbl.Add(x);
                }
                foreach (long[] x in fwdUids)
                {
                    tbl.Add(x);
                }
                long[]   srcUids     = tbl;
                string[] return_urls = shs.BatchedUidToUrl(srcUids);



                //Console.Error.WriteLine("Length in Archive {0}", tbl.GetSize());
                //var extTbl = tbl.Subtract(new UidMap(uids, true));
                //Console.Error.WriteLine("Length in Archive {0}", extTbl.GetSize());

                //long one_hope_retrieval_time = sw.ElapsedTicks;
                //Console.WriteLine("Retrieve 1-hops nodes: {0} from {1} root_nodes in {2} microseconds", srcUids.Length, uids.Length, one_hope_retrieval_time / 10);

                //sw = Stopwatch.StartNew();
                var dstUids = shs.BatchedGetLinks(srcUids, Dir.Fwd);



                //long forward_link_of_one_hop = sw.ElapsedTicks;

                SortedDictionary <long, KeyValuePair <double, double> > return_score = computeHITS(tbl, srcUids, dstUids);



                //long[] extUids = extTbl;
                //var extUrls = shs.BatchedUidToUrl(extUids);

                long end_time = sw.ElapsedTicks;
                Console.WriteLine("HITS finish in {0} microseconds with {1} links", end_time / 10, tbl.GetSize());


                /*
                 * int menu = 0;
                 *
                 * while ((menu = Int32.Parse(Console.ReadLine())) > 0)
                 * {
                 *  try {
                 *      Console.WriteLine("You choose {0}.", menu);
                 *      switch (menu)
                 *      {
                 *          case 1:
                 *              Console.Error.WriteLine("Num of URLs: {0}", tbl.GetSize());
                 *              tbl.PrintList();
                 *              break;
                 *          case 2:
                 *               Console.Error.WriteLine("Num of extend URLs: {0}", extTbl.GetSize());
                 *               extTbl.PrintList();
                 *               break;
                 *          case 3:
                 *              for (int i = 0; i < uids.Length; i++)
                 *              {
                 *                  if (uids[i] > -1)
                 *                  {
                 *                      int idx = tbl[uids[i]];
                 *                      Console.WriteLine("{0}\t{1}\t{2}", aut[idx], hub[idx], urls[i]);
                 *                  }
                 *              }
                 *              break;
                 *          case 4:
                 *              Console.Error.WriteLine("Num of extend URLs: {0}", extUids.Length);
                 *              for (int i = 0; i < extUrls.Length; i++)
                 *              {
                 *                  if (extUids[i] > -1)
                 *                  {
                 *                      int idx = tbl[extUids[i]];
                 *                      Console.WriteLine("{0}\t{1}\t{2}", aut[idx], hub[idx], extUrls[i]);
                 *                  }
                 *              }
                 *              break;
                 *          case 5:
                 *              Console.Error.WriteLine("Num of UIDS: {0}", uids.Length);
                 *              for (int i = 0; i < uids.Length; i++)
                 *              {
                 *                  Console.WriteLine("{0}", uids[i]);
                 *              }
                 *              break;
                 *          case 6:
                 *              Console.Error.WriteLine("Mapping UID to URL");
                 *              long uid = Int64.Parse(Console.ReadLine());
                 *              Console.WriteLine("{0}", shs.UidToUrl(uid));
                 *              break;
                 *          case 7:
                 *              Console.Error.WriteLine("Mapping URL to UID");
                 *              string url = Console.ReadLine();
                 *              Console.WriteLine("{0}", shs.UrlToUid(url));
                 *              break;
                 *          default:
                 *              Console.WriteLine("What?");
                 *              break;
                 *      }
                 *  }
                 *  catch (Exception ex)
                 *  {
                 *      Console.Error.WriteLine(ex.ToString());
                 *  }
                 * }
                 *
                 */

                //Output the result scores to screen
                var result_urls = shs.BatchedUidToUrl(srcUids);
                for (int i = 0; i < srcUids.Length; i++)
                {
                    if (return_score.ContainsKey(srcUids[i]))
                    {
                        KeyValuePair <double, double> score = new KeyValuePair <double, double>();
                        return_score.TryGetValue(srcUids[i], out score);
                        Console.WriteLine("{0}\t{1}\t{2}", score.Key, score.Value, result_urls[i]);
                    }
                }


                //long end_time = sw.ElapsedTicks;

                //Console.WriteLine("SALSA finish in {0} microseconds", end_time / 10);

                //for (int i = 0; i < scores.Length; i++)
                //{
                //    Console.WriteLine("{0}: {1}", urls[i], scores[i]);
                //}
            }
            catch (EndOfStreamException)
            {
            }
        }
    }
    public RevisionData(string revision_file)
    {
        long[] uuid = null;
        using (var rd = new BinaryReader(new GZipStream(new BufferedStream(new FileStream(revision_file, FileMode.Open, FileAccess.Read)), CompressionMode.Decompress)))
        {
            int i = 0;
            try
            {
                revision_data = new node_revision_info[NUM_RECORDS];
                uuid          = new long[NUM_RECORDS];
                for (i = 0; i < NUM_RECORDS; i++)
                {
                    uuid[i] = (long)rd.ReadInt64();
                    //Console.Write("{0} ", uuid[i]);
                    revision_data[i].number_of_revision = rd.ReadInt32();
                    //Console.Write("{0} ", revision_data[i].number_of_revision);
                    revision_data[i].outlink_vector_size = rd.ReadInt32();
                    //Console.Write("{0} ", revision_data[i].outlink_vector_size);
                    revision_data[i].revision_matrix = new byte[revision_data[i].number_of_revision][];
                    revision_data[i].time_duration   = new long[revision_data[i].number_of_revision];
                    int outlink_vector_matrix_size_in_byte = (int)Math.Ceiling((double)(revision_data[i].outlink_vector_size / 8.0));
                    //Console.WriteLine("2");
                    for (int j = 0; j < revision_data[i].number_of_revision; j++)
                    {
                        revision_data[i].revision_matrix[j] = new byte[outlink_vector_matrix_size_in_byte];
                        //Console.WriteLine("3 {0}", j);
                        if (j == 0)
                        {
                            //string dt = rd.ReadString();
                            //Console.Write("{0} ", dt);
                            revision_data[i].first_time_stamp = Convert.ToDateTime(rd.ReadString()).AddMilliseconds(-3600000);
                            //revision_data[i].first_time_stamp = rd.ReadString();
                            revision_data[i].time_duration[j] = 0;
                        }
                        else
                        {
                            revision_data[i].time_duration[j] = rd.ReadInt64();
                            //Console.Write("Time{0} ", revision_data[i].time_duration[j]);
                        }

                        for (int k = 0; k < outlink_vector_matrix_size_in_byte; k++)
                        {
                            revision_data[i].revision_matrix[j][k] = rd.ReadByte();
                            //Console.Write("{0} ", revision_data[i].revision_matrix[j][k]);
                        }
                    }
                    //Console.WriteLine();
                }
            }
            catch (Exception e)
            {
                Console.Error.WriteLine("Reading error at line: {0}", i);
                Console.Error.WriteLine(e.Message);
            }
        }
        nodeMap = new UidMap(uuid);

        for (int i = 0; i < NUM_RECORDS; i++)
        {
            pointer[nodeMap[uuid[i]]] = i;
        }
    }
    public static SortedDictionary <long, KeyValuePair <double, double> > computeHITS(UidMap tbl, long[] srcUids, long[][] dstUids)
    {
        int n = dstUids.Length;
        //Console.WriteLine("Retrieve forward link of 1-hop nodes: {0} in {1} microseconds", dstUids.Length, forward_link_of_one_hop / 10);



        var srcId = new List <int> [n];
        var dstId = new List <int> [n];

        for (int i = 0; i < n; i++)
        {
            srcId[i] = new List <int>();
            dstId[i] = new List <int>();
        }
        for (int i = 0; i < n; i++)
        {
            int sid = tbl[srcUids[i]];
            for (int j = 0; j < dstUids[i].Length; j++)
            {
                int did = tbl[dstUids[i][j]];
                if (did != -1)
                {
                    srcId[sid].Add(did);
                    dstId[did].Add(sid);
                }
            }
        }

        //Build complete graph by array pointers



        double initScore = Math.Sqrt(1.0 / n);
        var    aut       = new double[n];
        var    tmp_aut   = new double[n];

        var hub     = new double[n];
        var tmp_hub = new double[n];

        double norm_aut = 0.0, norm_hub = 0.0;

        for (int i = 0; i < n; i++)
        {
            hub[i]     = aut[i] = initScore;
            tmp_aut[i] = tmp_hub[i] = 0.0;
        }


        for (int k = 0; k < ITERATION_NUM; k++)
        {
            norm_aut = norm_hub = 0.0;

            for (int u = 0; u < n; u++)
            {
                foreach (var id in srcId[u])
                {
                    tmp_aut[u] += hub[id];
                }
                norm_aut += Math.Pow(tmp_aut[u], 2.0);
            }
            norm_aut = Math.Sqrt(norm_aut);

            for (int u = 0; u < n; u++)
            {
                foreach (var id in dstId[u])
                {
                    tmp_hub[u] += aut[id];
                }
                norm_hub += Math.Pow(tmp_hub[u], 2.0);
            }
            norm_hub = Math.Sqrt(norm_hub);

            for (int u = 0; u < n; u++)
            {
                aut[u]     = tmp_aut[u] / norm_aut;
                hub[u]     = tmp_hub[u] / norm_hub;
                tmp_aut[u] = tmp_hub[u] = 0.0;
            }
        }

        //Prepare output data

        SortedDictionary <long, KeyValuePair <double, double> > result_score = new SortedDictionary <long, KeyValuePair <double, double> >();

        for (int i = 0; i < srcUids.Length; i++)
        {
            if (srcUids[i] > -1)
            {
                int idx = tbl[srcUids[i]];
                if (idx > -1)
                {
                    result_score.Add(srcUids[i], new KeyValuePair <double, double>(aut[idx], hub[idx]));
                }
            }
        }
        return(result_score);
    }
예제 #10
0
 public static void Main(string[] args)
 {
     if (args.Length != 2)
     {
         Console.Error.WriteLine("Usage: SHS.WCC <leader> <store>");
     }
     else
     {
         var sw    = Stopwatch.StartNew();
         var store = new Service(args[0]).OpenStore(Guid.Parse(args[1]));
         var roots = store.AllocateUidState <long>();
         roots.SetAll(x => x);
         var batch = new Batch <long>(10000);
         foreach (long u in store.Uids())
         {
             batch.Add(u);
             if (batch.Full || store.IsLastUid(u))
             {
                 long[]   uids = batch;
                 long[][] fwds = store.BatchedGetLinks(uids, Dir.Fwd);
                 var      map  = new UidMap(fwds);
                 map.Add(uids);
                 var xRoots = GetRoots(roots, map);
                 for (int i = 0; i < fwds.Length; i++)
                 {
                     uids[i] = xRoots[map[uids[i]]];
                     for (int j = 0; j < fwds[i].Length; j++)
                     {
                         fwds[i][j] = xRoots[map[fwds[i][j]]];
                     }
                 }
                 map = new UidMap(fwds);
                 map.Add(uids);
                 long[] reprs = roots.GetMany(map);
                 for (int i = 0; i < fwds.Length; i++)
                 {
                     long A = uids[i];
                     long a = map[A];
                     while (A != reprs[a])
                     {
                         A = reprs[a];
                         a = map[A];
                     }
                     for (int j = 0; j < fwds[i].Length; j++)
                     {
                         long B = fwds[i][j];
                         long b = map[B];
                         while (B != reprs[b])
                         {
                             B = reprs[b];
                             b = map[B];
                         }
                         if (reprs[a] < reprs[b])
                         {
                             reprs[b] = reprs[a];
                         }
                         else
                         {
                             reprs[a] = reprs[b];
                             a        = b;
                         }
                     }
                 }
                 roots.SetMany(map, reprs);
                 batch.Reset();
             }
         }
         batch = new Batch <long>(400000);
         foreach (long u in store.Uids())
         {
             batch.Add(u);
             if (batch.Full || store.IsLastUid(u))
             {
                 GetRoots(roots, batch);
                 batch.Reset();
             }
         }
         using (var sorter = new DiskSorter <UidVal <long> >(new Comparer(), Write, Read, 100000000)) {
             foreach (var uv in roots.GetAll())
             {
                 sorter.Add(uv);
             }
             sorter.Sort();
             using (var wccWr = new BinaryWriter(new BufferedStream(new FileStream("wcc-main.bin", FileMode.Create, FileAccess.Write)))) {
                 using (var idxWr = new BinaryWriter(new BufferedStream(new FileStream("wcc-index.bin", FileMode.Create, FileAccess.Write)))) {
                     long last     = 0;
                     long lastRoot = -1;
                     for (long i = 0; i < sorter.Total; i++)
                     {
                         var uv = sorter.Get();
                         wccWr.Write(uv.uid);
                         if (i == 0)
                         {
                             lastRoot = uv.val;
                         }
                         else if (uv.val != lastRoot)
                         {
                             idxWr.Write(i - last);
                             idxWr.Write(last);
                             last     = i;
                             lastRoot = uv.val;
                         }
                     }
                     Debug.Assert(sorter.AtEnd());
                     if (sorter.Total > 0)
                     {
                         idxWr.Write(sorter.Total - last);
                         idxWr.Write(last);
                     }
                 }
             }
         }
         var dict = new System.Collections.Generic.Dictionary <long, long>();
         using (var rd = new BinaryReader(new BufferedStream(new FileStream("wcc-index.bin", FileMode.Open, FileAccess.Read)))) {
             while (true)
             {
                 try {
                     long size = rd.ReadInt64();
                     long pos  = rd.ReadInt64();
                     if (!dict.ContainsKey(size))
                     {
                         dict[size] = 0;
                     }
                     dict[size]++;
                 } catch (EndOfStreamException) {
                     break;
                 }
             }
         }
         long maxSize = 0;
         long numWCCs = 0;
         foreach (var kv in dict)
         {
             if (kv.Key > maxSize)
             {
                 maxSize = kv.Key;
             }
             numWCCs += kv.Value;
         }
         Console.WriteLine("Done. {0} weakly connected components, largest has {1} nodes. Job took {2} seconds.", numWCCs, maxSize, 0.001 * sw.ElapsedMilliseconds);
     }
 }
예제 #11
0
파일: Salsa.cs 프로젝트: pombredanne/SHS
    public static void Main(string[] args)
    {
        var shs = new Service(args[0]).OpenStore(Guid.Parse(args[1]));

        using (var rd = new BinaryReader(new BufferedStream(new FileStream(args[2], FileMode.Open, FileAccess.Read)))) {
            int bs = int.Parse(args[3]);
            int fs = int.Parse(args[4]);
            while (true)
            {
                try {
                    int queryId = rd.ReadInt32();
                    int numUrls = rd.ReadInt32();
                    var urls    = new string[numUrls];
                    for (int i = 0; i < numUrls; i++)
                    {
                        urls[i] = rd.ReadString();
                    }
                    var uids    = shs.BatchedUrlToUid(urls);
                    var tbl     = new UidMap(uids);
                    var bwdUids = shs.BatchedSampleLinks(tbl, Dir.Bwd, bs, true);
                    var fwdUids = shs.BatchedSampleLinks(tbl, Dir.Fwd, fs, true);
                    foreach (long[] x in bwdUids)
                    {
                        tbl.Add(x);
                    }
                    foreach (long[] x in fwdUids)
                    {
                        tbl.Add(x);
                    }
                    long[] srcUids = tbl;
                    var    dstUids = shs.BatchedGetLinks(srcUids, Dir.Fwd);
                    int    n       = dstUids.Length;
                    var    srcId   = new List <int> [n];
                    var    dstId   = new List <int> [n];
                    for (int i = 0; i < n; i++)
                    {
                        srcId[i] = new List <int>();
                        dstId[i] = new List <int>();
                    }
                    for (int i = 0; i < n; i++)
                    {
                        int sid = tbl[srcUids[i]];
                        for (int j = 0; j < dstUids[i].Length; j++)
                        {
                            int did = tbl[dstUids[i][j]];
                            if (did != -1)
                            {
                                srcId[sid].Add(did);
                                dstId[did].Add(sid);
                            }
                        }
                    }
                    int numAuts = 0;
                    for (int i = 0; i < n; i++)
                    {
                        if (dstId[i].Count > 0)
                        {
                            numAuts++;
                        }
                    }
                    double initAut = 1.0 / numAuts;
                    var    aut     = new double[n];
                    var    tmp     = new double[n];
                    for (int i = 0; i < n; i++)
                    {
                        aut[i] = dstId[i].Count > 0 ? initAut : 0.0;
                    }
                    for (int k = 0; k < 100; k++)
                    {
                        for (int u = 0; u < n; u++)
                        {
                            foreach (var id in dstId[u])
                            {
                                tmp[id] += (aut[u] / dstId[u].Count);
                            }
                            aut[u] = 0.0;
                        }
                        for (int u = 0; u < n; u++)
                        {
                            foreach (var id in srcId[u])
                            {
                                aut[id] += (tmp[u] / srcId[u].Count);
                            }
                            tmp[u] = 0.0;
                        }
                    }
                    var scores = new double[urls.Length];
                    for (int i = 0; i < scores.Length; i++)
                    {
                        scores[i] = uids[i] == -1 ? 0.0 : aut[tbl[uids[i]]];
                    }
                    double bestScore = double.MinValue;
                    string bestUrl   = null;
                    for (int i = 0; i < urls.Length; i++)
                    {
                        if (scores[i] > bestScore)
                        {
                            bestScore = scores[i];
                            bestUrl   = urls[i];
                        }
                    }
                    System.Console.Error.WriteLine("{0} {1}", queryId, bestUrl);
                } catch (EndOfStreamException) {
                    break;
                }
            }
        }
    }
예제 #12
0
    public static void Main(string[] args)
    {
        if (args.Length != 4)
        {
            Console.Error.WriteLine("Usage: SHS.PageRankFT <leader> <store> <d> <iters>");
        }
        else
        {
            var sw    = Stopwatch.StartNew();
            var store = new Service(args[0]).OpenStore(Guid.Parse(args[1]));

            Action <Action> Checkpointed = delegate(Action checkpointedBlock) {
                while (true)
                {
                    try {
                        checkpointedBlock();
                        store.Checkpoint();
                        break;
                    } catch (ServerFailure) {
                        Console.Error.WriteLine("Restarting from checkpoint");
                        // go again
                    }
                }
            };

            double d        = double.Parse(args[2]);
            int    numIters = int.Parse(args[3]);
            long   n        = store.NumUrls();

            UidState <double> oldScores = null, newScores = null;

            Checkpointed(delegate() {
                newScores = store.AllocateUidState <double>();
                oldScores = store.AllocateUidState <double>();
                oldScores.SetAll(uid => 1.0 / n);
            });

            for (int k = 0; k < numIters; k++)
            {
                Checkpointed(delegate() {
                    var uidBatch = new Batch <long>(50000);
                    newScores.SetAll(x => d / n);
                    foreach (long u in store.Uids())
                    {
                        uidBatch.Add(u);
                        if (uidBatch.Full || store.IsLastUid(u))
                        {
                            var linkBatch = store.BatchedGetLinks(uidBatch, Dir.Fwd);
                            var newMap    = new UidMap(linkBatch);
                            var oldSc     = oldScores.GetMany(uidBatch);
                            var newSc     = newScores.GetMany(newMap);
                            for (int i = 0; i < uidBatch.Count; i++)
                            {
                                var links = linkBatch[i];
                                double f  = (1.0 - d) * oldSc[i] / links.Length;
                                foreach (var link in links)
                                {
                                    newSc[newMap[link]] += f;
                                }
                            }
                            newScores.SetMany(newMap, newSc);
                            uidBatch.Reset();
                        }
                    }
                });
                var tmp = newScores; newScores = oldScores; oldScores = tmp;
                Console.WriteLine("Done with iteration {0}", k);
            }
            using (var wr = new BinaryWriter(new BufferedStream(new FileStream("pr-scores.bin", FileMode.Create, FileAccess.Write)))) {
                foreach (var us in newScores.GetAll())
                {
                    wr.Write(us.val);
                }
            }
            Console.WriteLine("Done. {0} iterations took {1} seconds.", numIters, 0.001 * sw.ElapsedMilliseconds);
        }
    }
예제 #13
0
    public static void Main(string[] args)
    {
        var shs           = new Service(args[0]).OpenStore(Guid.Parse(args[1]));
        int ITERATION_NUM = 10;

        //using (var rd = new BinaryReader(new BufferedStream(new FileStream(args[2], FileMode.Open, FileAccess.Read)))) {
        using (var rd = new StreamReader(new BufferedStream(new FileStream(args[2], FileMode.Open, FileAccess.Read))))
        {
            int bs = int.Parse(args[3]);
            int fs = int.Parse(args[4]);
            while (true)
            {
                try {
                    int queryId = Int32.Parse(rd.ReadLine());
                    int numUrls = Int32.Parse(rd.ReadLine());
                    var urls    = new string[numUrls];
                    for (int i = 0; i < numUrls; i++)
                    {
                        urls[i] = rd.ReadLine();
                    }


                    var sw      = Stopwatch.StartNew();
                    var uids    = shs.BatchedUrlToUid(urls);
                    var tbl     = new UidMap(uids);
                    var bwdUids = shs.BatchedSampleLinks(tbl, Dir.Bwd, bs, true);
                    var fwdUids = shs.BatchedSampleLinks(tbl, Dir.Fwd, fs, true);
                    foreach (long[] x in bwdUids)
                    {
                        tbl.Add(x);
                    }
                    foreach (long[] x in fwdUids)
                    {
                        tbl.Add(x);
                    }
                    long[] srcUids = tbl;
                    //long one_hope_retrieval_time = sw.ElapsedTicks;
                    //Console.WriteLine("Retrieve 1-hops nodes: {0} from {1} root_nodes in {2} microseconds", srcUids.Length, uids.Length, one_hope_retrieval_time / 10);

                    //sw = Stopwatch.StartNew();
                    var dstUids = shs.BatchedGetLinks(srcUids, Dir.Fwd);

                    //long forward_link_of_one_hop = sw.ElapsedTicks;

                    int n = dstUids.Length;
                    //Console.WriteLine("Retrieve forward link of 1-hop nodes: {0} in {1} microseconds", dstUids.Length, forward_link_of_one_hop / 10);


                    var srcId = new List <int> [n];
                    var dstId = new List <int> [n];
                    for (int i = 0; i < n; i++)
                    {
                        srcId[i] = new List <int>();
                        dstId[i] = new List <int>();
                    }
                    sw = Stopwatch.StartNew();
                    for (int i = 0; i < n; i++)
                    {
                        int sid = tbl[srcUids[i]];
                        for (int j = 0; j < dstUids[i].Length; j++)
                        {
                            int did = tbl[dstUids[i][j]];
                            if (did != -1)
                            {
                                srcId[sid].Add(did);
                                dstId[did].Add(sid);
                            }
                        }
                    }

                    long end_time = sw.ElapsedTicks;
                    Console.WriteLine("SALSA finish in {0} microseconds", end_time / 10);

                    int numAuts = 0;
                    for (int i = 0; i < n; i++)
                    {
                        if (dstId[i].Count > 0)
                        {
                            numAuts++;
                        }
                    }
                    double initAut = 1.0 / numAuts;
                    var    aut     = new double[n];
                    var    tmp     = new double[n];
                    for (int i = 0; i < n; i++)
                    {
                        aut[i] = dstId[i].Count > 0 ? initAut : 0.0;
                    }
                    for (int k = 0; k < ITERATION_NUM; k++)
                    {
                        for (int u = 0; u < n; u++)
                        {
                            foreach (var id in dstId[u])
                            {
                                tmp[id] += (aut[u] / dstId[u].Count);
                            }
                            aut[u] = 0.0;
                        }
                        for (int u = 0; u < n; u++)
                        {
                            foreach (var id in srcId[u])
                            {
                                aut[id] += (tmp[u] / srcId[u].Count);
                            }
                            tmp[u] = 0.0;
                        }
                    }
                    var scores = new double[urls.Length];
                    for (int i = 0; i < scores.Length; i++)
                    {
                        scores[i] = uids[i] == -1 ? 0.0 : aut[tbl[uids[i]]];
                    }

                    //long end_time = sw.ElapsedTicks;

                    //Console.WriteLine("SALSA finish in {0} microseconds", end_time / 10);

                    for (int i = 0; i < scores.Length; i++)
                    {
                        Console.WriteLine("{0}: {1}", urls[i], scores[i]);
                    }

                    double bestScore = double.MinValue;
                    string bestUrl   = null;
                    for (int i = 0; i < urls.Length; i++)
                    {
                        if (scores[i] > bestScore)
                        {
                            bestScore = scores[i];
                            bestUrl   = urls[i];
                        }
                    }
                    System.Console.Error.WriteLine("{0} {1}", queryId, bestUrl);
                } catch (EndOfStreamException) {
                    break;
                }
            }
        }
    }