Пример #1
0
    private static long[] GetRoots(UidState <long> roots, long[] uids)
    {
        var  reprs = roots.GetMany(uids);
        var  map   = new UidMap();
        bool recur = false;

        for (int i = 0; i < uids.Length; i++)
        {
            if (reprs[i] != uids[i])
            {
                map.Add(reprs[i]);
                recur = true;
            }
        }
        if (recur)
        {
            var change   = false;
            var reprClos = GetRoots(roots, map);
            for (int i = 0; i < uids.Length; i++)
            {
                if (reprs[i] != uids[i])
                {
                    var rep = reprClos[map[reprs[i]]];
                    if (reprs[i] != rep)
                    {
                        reprs[i] = rep;
                        change   = true;
                    }
                }
            }
            if (change)
            {
                roots.SetMany(uids, reprs);
            }
        }
        return(reprs);
    }
    public static void Main(string[] args)
    {
        var shs = new Service(args[0]).OpenStore(Guid.Parse(args[1]));

        //using (var rd = new BinaryReader(new BufferedStream(new FileStream(args[2], FileMode.Open, FileAccess.Read)))) {
        RevisionData info = new RevisionData(args[1]);

        using (var rd = new StreamReader(new BufferedStream(new FileStream(args[2], FileMode.Open, FileAccess.Read))))
        {
            DateTime d1 = Convert.ToDateTime(args[3]);
            DateTime d2 = Convert.ToDateTime(args[4]);

            try
            {
                int queryId = Int32.Parse(rd.ReadLine());
                int numUrls = Int32.Parse(rd.ReadLine());
                var urls    = new string[numUrls];
                for (int i = 0; i < numUrls; i++)
                {
                    urls[i] = rd.ReadLine();
                }



                var sw   = Stopwatch.StartNew();
                var uids = shs.BatchedUrlToUid(urls);
                var tbl  = new UidMap(uids, true);


                long[] bwdUids  = tbl;
                var    bwdLinks = shs.BatchedSampleLinks(bwdUids, Dir.Bwd, bs, true);
                SortedDictionary <string, long> temp = new SortedDictionary <string, long>();
                for (int i = 0; i < bwdUids.Length; i++)
                {
                    var bwdValidateUids = shs.BatchedSampleLinks(bwdLinks[i], Dir.Fwd, fs, true);
                    for (int j = 0; j < bwdValidateUids.Length; j++)
                    {
                        string[] validateUrls = shs.BatchedUidToUrl(bwdValidateUids[j]);
                        temp = info.getOutlinkInDuration(bwdLinks[i][j], bwdValidateUids[j], validateUrls, d1, d2);
                        if (temp.ContainsValue(bwdUids[i]))
                        {
                        }
                    }
                    var bwdValidateUrls = shs.BatchedUidToUrl(bwdLinks[i]);
                    //info.getInlinkInDuration(bwdUids[i], bwdLinks[i], )
                }



                var fwdUids = shs.BatchedSampleLinks(tbl, Dir.Fwd, fs, true);
                var fwdUrls = shs.BatchedUidToUrl(tbl);



                foreach (long[] x in bwdLinks)
                {
                    tbl.Add(x);
                }
                foreach (long[] x in fwdUids)
                {
                    tbl.Add(x);
                }
                long[]   srcUids     = tbl;
                string[] return_urls = shs.BatchedUidToUrl(srcUids);



                //Console.Error.WriteLine("Length in Archive {0}", tbl.GetSize());
                //var extTbl = tbl.Subtract(new UidMap(uids, true));
                //Console.Error.WriteLine("Length in Archive {0}", extTbl.GetSize());

                //long one_hope_retrieval_time = sw.ElapsedTicks;
                //Console.WriteLine("Retrieve 1-hops nodes: {0} from {1} root_nodes in {2} microseconds", srcUids.Length, uids.Length, one_hope_retrieval_time / 10);

                //sw = Stopwatch.StartNew();
                var dstUids = shs.BatchedGetLinks(srcUids, Dir.Fwd);



                //long forward_link_of_one_hop = sw.ElapsedTicks;

                SortedDictionary <long, KeyValuePair <double, double> > return_score = computeHITS(tbl, srcUids, dstUids);



                //long[] extUids = extTbl;
                //var extUrls = shs.BatchedUidToUrl(extUids);

                long end_time = sw.ElapsedTicks;
                Console.WriteLine("HITS finish in {0} microseconds with {1} links", end_time / 10, tbl.GetSize());


                /*
                 * int menu = 0;
                 *
                 * while ((menu = Int32.Parse(Console.ReadLine())) > 0)
                 * {
                 *  try {
                 *      Console.WriteLine("You choose {0}.", menu);
                 *      switch (menu)
                 *      {
                 *          case 1:
                 *              Console.Error.WriteLine("Num of URLs: {0}", tbl.GetSize());
                 *              tbl.PrintList();
                 *              break;
                 *          case 2:
                 *               Console.Error.WriteLine("Num of extend URLs: {0}", extTbl.GetSize());
                 *               extTbl.PrintList();
                 *               break;
                 *          case 3:
                 *              for (int i = 0; i < uids.Length; i++)
                 *              {
                 *                  if (uids[i] > -1)
                 *                  {
                 *                      int idx = tbl[uids[i]];
                 *                      Console.WriteLine("{0}\t{1}\t{2}", aut[idx], hub[idx], urls[i]);
                 *                  }
                 *              }
                 *              break;
                 *          case 4:
                 *              Console.Error.WriteLine("Num of extend URLs: {0}", extUids.Length);
                 *              for (int i = 0; i < extUrls.Length; i++)
                 *              {
                 *                  if (extUids[i] > -1)
                 *                  {
                 *                      int idx = tbl[extUids[i]];
                 *                      Console.WriteLine("{0}\t{1}\t{2}", aut[idx], hub[idx], extUrls[i]);
                 *                  }
                 *              }
                 *              break;
                 *          case 5:
                 *              Console.Error.WriteLine("Num of UIDS: {0}", uids.Length);
                 *              for (int i = 0; i < uids.Length; i++)
                 *              {
                 *                  Console.WriteLine("{0}", uids[i]);
                 *              }
                 *              break;
                 *          case 6:
                 *              Console.Error.WriteLine("Mapping UID to URL");
                 *              long uid = Int64.Parse(Console.ReadLine());
                 *              Console.WriteLine("{0}", shs.UidToUrl(uid));
                 *              break;
                 *          case 7:
                 *              Console.Error.WriteLine("Mapping URL to UID");
                 *              string url = Console.ReadLine();
                 *              Console.WriteLine("{0}", shs.UrlToUid(url));
                 *              break;
                 *          default:
                 *              Console.WriteLine("What?");
                 *              break;
                 *      }
                 *  }
                 *  catch (Exception ex)
                 *  {
                 *      Console.Error.WriteLine(ex.ToString());
                 *  }
                 * }
                 *
                 */

                //Output the result scores to screen
                var result_urls = shs.BatchedUidToUrl(srcUids);
                for (int i = 0; i < srcUids.Length; i++)
                {
                    if (return_score.ContainsKey(srcUids[i]))
                    {
                        KeyValuePair <double, double> score = new KeyValuePair <double, double>();
                        return_score.TryGetValue(srcUids[i], out score);
                        Console.WriteLine("{0}\t{1}\t{2}", score.Key, score.Value, result_urls[i]);
                    }
                }


                //long end_time = sw.ElapsedTicks;

                //Console.WriteLine("SALSA finish in {0} microseconds", end_time / 10);

                //for (int i = 0; i < scores.Length; i++)
                //{
                //    Console.WriteLine("{0}: {1}", urls[i], scores[i]);
                //}
            }
            catch (EndOfStreamException)
            {
            }
        }
    }
Пример #3
0
 public static void Main(string[] args)
 {
     if (args.Length != 2)
     {
         Console.Error.WriteLine("Usage: SHS.WCC <leader> <store>");
     }
     else
     {
         var sw    = Stopwatch.StartNew();
         var store = new Service(args[0]).OpenStore(Guid.Parse(args[1]));
         var roots = store.AllocateUidState <long>();
         roots.SetAll(x => x);
         var batch = new Batch <long>(10000);
         foreach (long u in store.Uids())
         {
             batch.Add(u);
             if (batch.Full || store.IsLastUid(u))
             {
                 long[]   uids = batch;
                 long[][] fwds = store.BatchedGetLinks(uids, Dir.Fwd);
                 var      map  = new UidMap(fwds);
                 map.Add(uids);
                 var xRoots = GetRoots(roots, map);
                 for (int i = 0; i < fwds.Length; i++)
                 {
                     uids[i] = xRoots[map[uids[i]]];
                     for (int j = 0; j < fwds[i].Length; j++)
                     {
                         fwds[i][j] = xRoots[map[fwds[i][j]]];
                     }
                 }
                 map = new UidMap(fwds);
                 map.Add(uids);
                 long[] reprs = roots.GetMany(map);
                 for (int i = 0; i < fwds.Length; i++)
                 {
                     long A = uids[i];
                     long a = map[A];
                     while (A != reprs[a])
                     {
                         A = reprs[a];
                         a = map[A];
                     }
                     for (int j = 0; j < fwds[i].Length; j++)
                     {
                         long B = fwds[i][j];
                         long b = map[B];
                         while (B != reprs[b])
                         {
                             B = reprs[b];
                             b = map[B];
                         }
                         if (reprs[a] < reprs[b])
                         {
                             reprs[b] = reprs[a];
                         }
                         else
                         {
                             reprs[a] = reprs[b];
                             a        = b;
                         }
                     }
                 }
                 roots.SetMany(map, reprs);
                 batch.Reset();
             }
         }
         batch = new Batch <long>(400000);
         foreach (long u in store.Uids())
         {
             batch.Add(u);
             if (batch.Full || store.IsLastUid(u))
             {
                 GetRoots(roots, batch);
                 batch.Reset();
             }
         }
         using (var sorter = new DiskSorter <UidVal <long> >(new Comparer(), Write, Read, 100000000)) {
             foreach (var uv in roots.GetAll())
             {
                 sorter.Add(uv);
             }
             sorter.Sort();
             using (var wccWr = new BinaryWriter(new BufferedStream(new FileStream("wcc-main.bin", FileMode.Create, FileAccess.Write)))) {
                 using (var idxWr = new BinaryWriter(new BufferedStream(new FileStream("wcc-index.bin", FileMode.Create, FileAccess.Write)))) {
                     long last     = 0;
                     long lastRoot = -1;
                     for (long i = 0; i < sorter.Total; i++)
                     {
                         var uv = sorter.Get();
                         wccWr.Write(uv.uid);
                         if (i == 0)
                         {
                             lastRoot = uv.val;
                         }
                         else if (uv.val != lastRoot)
                         {
                             idxWr.Write(i - last);
                             idxWr.Write(last);
                             last     = i;
                             lastRoot = uv.val;
                         }
                     }
                     Debug.Assert(sorter.AtEnd());
                     if (sorter.Total > 0)
                     {
                         idxWr.Write(sorter.Total - last);
                         idxWr.Write(last);
                     }
                 }
             }
         }
         var dict = new System.Collections.Generic.Dictionary <long, long>();
         using (var rd = new BinaryReader(new BufferedStream(new FileStream("wcc-index.bin", FileMode.Open, FileAccess.Read)))) {
             while (true)
             {
                 try {
                     long size = rd.ReadInt64();
                     long pos  = rd.ReadInt64();
                     if (!dict.ContainsKey(size))
                     {
                         dict[size] = 0;
                     }
                     dict[size]++;
                 } catch (EndOfStreamException) {
                     break;
                 }
             }
         }
         long maxSize = 0;
         long numWCCs = 0;
         foreach (var kv in dict)
         {
             if (kv.Key > maxSize)
             {
                 maxSize = kv.Key;
             }
             numWCCs += kv.Value;
         }
         Console.WriteLine("Done. {0} weakly connected components, largest has {1} nodes. Job took {2} seconds.", numWCCs, maxSize, 0.001 * sw.ElapsedMilliseconds);
     }
 }
Пример #4
0
    public static void Main(string[] args)
    {
        var shs = new Service(args[0]).OpenStore(Guid.Parse(args[1]));

        using (var rd = new BinaryReader(new BufferedStream(new FileStream(args[2], FileMode.Open, FileAccess.Read)))) {
            int bs = int.Parse(args[3]);
            int fs = int.Parse(args[4]);
            while (true)
            {
                try {
                    int queryId = rd.ReadInt32();
                    int numUrls = rd.ReadInt32();
                    var urls    = new string[numUrls];
                    for (int i = 0; i < numUrls; i++)
                    {
                        urls[i] = rd.ReadString();
                    }
                    var uids    = shs.BatchedUrlToUid(urls);
                    var tbl     = new UidMap(uids);
                    var bwdUids = shs.BatchedSampleLinks(tbl, Dir.Bwd, bs, true);
                    var fwdUids = shs.BatchedSampleLinks(tbl, Dir.Fwd, fs, true);
                    foreach (long[] x in bwdUids)
                    {
                        tbl.Add(x);
                    }
                    foreach (long[] x in fwdUids)
                    {
                        tbl.Add(x);
                    }
                    long[] srcUids = tbl;
                    var    dstUids = shs.BatchedGetLinks(srcUids, Dir.Fwd);
                    int    n       = dstUids.Length;
                    var    srcId   = new List <int> [n];
                    var    dstId   = new List <int> [n];
                    for (int i = 0; i < n; i++)
                    {
                        srcId[i] = new List <int>();
                        dstId[i] = new List <int>();
                    }
                    for (int i = 0; i < n; i++)
                    {
                        int sid = tbl[srcUids[i]];
                        for (int j = 0; j < dstUids[i].Length; j++)
                        {
                            int did = tbl[dstUids[i][j]];
                            if (did != -1)
                            {
                                srcId[sid].Add(did);
                                dstId[did].Add(sid);
                            }
                        }
                    }
                    int numAuts = 0;
                    for (int i = 0; i < n; i++)
                    {
                        if (dstId[i].Count > 0)
                        {
                            numAuts++;
                        }
                    }
                    double initAut = 1.0 / numAuts;
                    var    aut     = new double[n];
                    var    tmp     = new double[n];
                    for (int i = 0; i < n; i++)
                    {
                        aut[i] = dstId[i].Count > 0 ? initAut : 0.0;
                    }
                    for (int k = 0; k < 100; k++)
                    {
                        for (int u = 0; u < n; u++)
                        {
                            foreach (var id in dstId[u])
                            {
                                tmp[id] += (aut[u] / dstId[u].Count);
                            }
                            aut[u] = 0.0;
                        }
                        for (int u = 0; u < n; u++)
                        {
                            foreach (var id in srcId[u])
                            {
                                aut[id] += (tmp[u] / srcId[u].Count);
                            }
                            tmp[u] = 0.0;
                        }
                    }
                    var scores = new double[urls.Length];
                    for (int i = 0; i < scores.Length; i++)
                    {
                        scores[i] = uids[i] == -1 ? 0.0 : aut[tbl[uids[i]]];
                    }
                    double bestScore = double.MinValue;
                    string bestUrl   = null;
                    for (int i = 0; i < urls.Length; i++)
                    {
                        if (scores[i] > bestScore)
                        {
                            bestScore = scores[i];
                            bestUrl   = urls[i];
                        }
                    }
                    System.Console.Error.WriteLine("{0} {1}", queryId, bestUrl);
                } catch (EndOfStreamException) {
                    break;
                }
            }
        }
    }
Пример #5
0
    public static void Main(string[] args)
    {
        var shs           = new Service(args[0]).OpenStore(Guid.Parse(args[1]));
        int ITERATION_NUM = 10;

        //using (var rd = new BinaryReader(new BufferedStream(new FileStream(args[2], FileMode.Open, FileAccess.Read)))) {
        using (var rd = new StreamReader(new BufferedStream(new FileStream(args[2], FileMode.Open, FileAccess.Read))))
        {
            int bs = int.Parse(args[3]);
            int fs = int.Parse(args[4]);
            while (true)
            {
                try {
                    int queryId = Int32.Parse(rd.ReadLine());
                    int numUrls = Int32.Parse(rd.ReadLine());
                    var urls    = new string[numUrls];
                    for (int i = 0; i < numUrls; i++)
                    {
                        urls[i] = rd.ReadLine();
                    }


                    var sw      = Stopwatch.StartNew();
                    var uids    = shs.BatchedUrlToUid(urls);
                    var tbl     = new UidMap(uids);
                    var bwdUids = shs.BatchedSampleLinks(tbl, Dir.Bwd, bs, true);
                    var fwdUids = shs.BatchedSampleLinks(tbl, Dir.Fwd, fs, true);
                    foreach (long[] x in bwdUids)
                    {
                        tbl.Add(x);
                    }
                    foreach (long[] x in fwdUids)
                    {
                        tbl.Add(x);
                    }
                    long[] srcUids = tbl;
                    //long one_hope_retrieval_time = sw.ElapsedTicks;
                    //Console.WriteLine("Retrieve 1-hops nodes: {0} from {1} root_nodes in {2} microseconds", srcUids.Length, uids.Length, one_hope_retrieval_time / 10);

                    //sw = Stopwatch.StartNew();
                    var dstUids = shs.BatchedGetLinks(srcUids, Dir.Fwd);

                    //long forward_link_of_one_hop = sw.ElapsedTicks;

                    int n = dstUids.Length;
                    //Console.WriteLine("Retrieve forward link of 1-hop nodes: {0} in {1} microseconds", dstUids.Length, forward_link_of_one_hop / 10);


                    var srcId = new List <int> [n];
                    var dstId = new List <int> [n];
                    for (int i = 0; i < n; i++)
                    {
                        srcId[i] = new List <int>();
                        dstId[i] = new List <int>();
                    }
                    sw = Stopwatch.StartNew();
                    for (int i = 0; i < n; i++)
                    {
                        int sid = tbl[srcUids[i]];
                        for (int j = 0; j < dstUids[i].Length; j++)
                        {
                            int did = tbl[dstUids[i][j]];
                            if (did != -1)
                            {
                                srcId[sid].Add(did);
                                dstId[did].Add(sid);
                            }
                        }
                    }

                    long end_time = sw.ElapsedTicks;
                    Console.WriteLine("SALSA finish in {0} microseconds", end_time / 10);

                    int numAuts = 0;
                    for (int i = 0; i < n; i++)
                    {
                        if (dstId[i].Count > 0)
                        {
                            numAuts++;
                        }
                    }
                    double initAut = 1.0 / numAuts;
                    var    aut     = new double[n];
                    var    tmp     = new double[n];
                    for (int i = 0; i < n; i++)
                    {
                        aut[i] = dstId[i].Count > 0 ? initAut : 0.0;
                    }
                    for (int k = 0; k < ITERATION_NUM; k++)
                    {
                        for (int u = 0; u < n; u++)
                        {
                            foreach (var id in dstId[u])
                            {
                                tmp[id] += (aut[u] / dstId[u].Count);
                            }
                            aut[u] = 0.0;
                        }
                        for (int u = 0; u < n; u++)
                        {
                            foreach (var id in srcId[u])
                            {
                                aut[id] += (tmp[u] / srcId[u].Count);
                            }
                            tmp[u] = 0.0;
                        }
                    }
                    var scores = new double[urls.Length];
                    for (int i = 0; i < scores.Length; i++)
                    {
                        scores[i] = uids[i] == -1 ? 0.0 : aut[tbl[uids[i]]];
                    }

                    //long end_time = sw.ElapsedTicks;

                    //Console.WriteLine("SALSA finish in {0} microseconds", end_time / 10);

                    for (int i = 0; i < scores.Length; i++)
                    {
                        Console.WriteLine("{0}: {1}", urls[i], scores[i]);
                    }

                    double bestScore = double.MinValue;
                    string bestUrl   = null;
                    for (int i = 0; i < urls.Length; i++)
                    {
                        if (scores[i] > bestScore)
                        {
                            bestScore = scores[i];
                            bestUrl   = urls[i];
                        }
                    }
                    System.Console.Error.WriteLine("{0} {1}", queryId, bestUrl);
                } catch (EndOfStreamException) {
                    break;
                }
            }
        }
    }