Beispiel #1
0
        public void Load(int filebuffersize, string file, int loadthreadcount)
        {
            LogLine("---- JaggedFriendsLists.Load ----");

            if (loadthreadcount < 1)
            {
                throw new Exception("Bad load thread count");
            }

            //usercount = GetUniqueUserCount(file);

            if (table.IsNull)
            {
                //table = new OneDInt32(1024 * 1024 * 1024 / 2); // ...
                throw new Exception("Table is null; must call SetUserTableSize");
            }
            numskippedlines = 0;

            if (0 == usertotableranges.Count)
            {
                AddUserIDRange(0, 400000000 - 1);
            }

            using (MySpace.DataMining.SeqFileStream.SeqFileStreamReader stm = new MySpace.DataMining.SeqFileStream.SeqFileStreamReader(file, System.IO.FileMode.Open, System.IO.FileAccess.Read, System.IO.FileShare.Read, filebuffersize))
            {
                SharedLoadContext slc = new SharedLoadContext();
                slc.streamqueue = stm;
                List <System.Threading.Thread> threads = new List <System.Threading.Thread>(loadthreadcount);
                for (int i = 0; i != loadthreadcount; i++)
                {
                    System.Threading.Thread thd = new System.Threading.Thread(new System.Threading.ParameterizedThreadStart(loadthreadproc));
                    thd.Name = "CFO_Load_" + i.ToString();
                    thd.Start(slc);
                    threads.Add(thd);
                }
                for (int i = 0; i != loadthreadcount; i++)
                {
                    threads[i].Join();
                }
            }

            if (hittablelimit)
            {
                //GotException(new Exception("table limit reached"));
                LogLine("table limit reached");
            }

            LogLine("Load finished: number of rows: " + realusercount.ToString());
        }
Beispiel #2
0
        public void Load(int filebuffersize, string file, int loadthreadcount)
        {
            LogLine("---- RectangularFriendsLists.Load ----");

            if (loadthreadcount < 1)
            {
                throw new Exception("Bad load thread count");
            }

            //usercount = GetUniqueUserCount(file);

            table           = new TwoDInt32(usercount, 1 + maxfriends);
            numskippedlines = 0;

            if (0 == usertotableranges.Count)
            {
                AddUserIDRange(0, 400000000 - 1);
            }

            using (MySpace.DataMining.SeqFileStream.SeqFileStreamReader stm = new MySpace.DataMining.SeqFileStream.SeqFileStreamReader(file, System.IO.FileMode.Open, System.IO.FileAccess.Read, System.IO.FileShare.Read, filebuffersize))
            {
                SharedLoadContext slc = new SharedLoadContext();
                slc.streamqueue = stm;
                List <System.Threading.Thread> threads = new List <System.Threading.Thread>(loadthreadcount);
                for (int i = 0; i != loadthreadcount; i++)
                {
                    System.Threading.Thread thd = new System.Threading.Thread(new System.Threading.ParameterizedThreadStart(loadthreadproc));
                    thd.Name = "CFO_Load_" + i.ToString();
                    thd.Start(slc);
                    threads.Add(thd);
                }
                for (int i = 0; i != loadthreadcount; i++)
                {
                    threads[i].Join();
                }
            }

            if (realusercount >= usercount)
            {
                //GotException(new Exception("row limit reached"));
                LogLine("row limit reached");
            }

            LogLine("Load finished: number of rows: " + realusercount.ToString() + " (max " + usercount.ToString() + ")");
        }
        public void Load(int filebuffersize, string file)
        {
            LogLine("---- RectangularFriendsListsHugeMemory.Load ----");

            //usercount = GetUniqueUserCount(file);

            int blockusercount = usercount / nslaves;
            if (0 != (usercount % nslaves))
            {
                blockusercount++;
            }
            long blocksize = ((long)blockusercount * (1 + (long)MaxFriends)) << 2;

            int packetsize = nbatchedrows * (((1 + MaxFriends) << 2) + 1 + 8 + 4) + 1;
            smtable = new SlaveMemory(objname, "CollaborativeFilteringObjectsSlave.exe", blocksize, packetsize, nthreads, nslaves);
            numskippedlines = 0;

            if (0 == usertotableranges.Count)
            {
                AddUserIDRange(0, 400000000 - 1);
            }

            using (MySpace.DataMining.SeqFileStream.SeqFileStreamReader stm = new MySpace.DataMining.SeqFileStream.SeqFileStreamReader(file, System.IO.FileMode.Open, System.IO.FileAccess.Read, System.IO.FileShare.Read, filebuffersize))
            {
                SharedLoadContext slc = new SharedLoadContext();
                slc.streamqueue = stm;
                List<System.Threading.Thread> threads = new List<System.Threading.Thread>(nthreads);
                smtable.Open(); // !
                for (int i = 0; i != nthreads; i++)
                {
                    ThreadLoadData tld = new ThreadLoadData();
                    tld.slc = slc;
                    tld.tv = smtable.ThreadViews[i];
                    System.Threading.Thread thd = new System.Threading.Thread(new System.Threading.ParameterizedThreadStart(loadthreadproc));
                    thd.Name = "CFO_Load_" + i.ToString();
                    thd.Start(tld);
                    threads.Add(thd);
                }
                for (int i = 0; i != nthreads; i++)
                {
                    threads[i].Join();
                }
            }
            if (null != goterr)
            {
                Exception e = goterr;
                goterr = null;
                throw e;
            }

#if DEBUG
            if (smtable._batchspilled)
            {
                LogLine("RectangularFriendsListsHugeMemory.Load: SlaveMemory batching spilled to another slave");
                smtable._batchspilled = false;
            }
#endif

            if (realusercount >= usercount)
            {
                //GotException(new Exception("row limit reached"));
                LogLine("row limit reached");
            }

            LogLine("Load finished: number of rows: " + realusercount.ToString() + " (max " + usercount.ToString() + ")");
        }
Beispiel #4
0
        void loadthreadproc(object obj)
        {
            try
            {
#if DEBUG
#else
                unchecked
#endif
                {
                    SharedLoadContext slc = (SharedLoadContext)obj;
                    SeqFileStream.SeqFileStreamReader streamqueue = slc.streamqueue;
                    List <Int32> friendsbuf = new List <int>(256);
                    byte[]       linebuf    = new byte[32];
                    // Assumes trailing friend-IDs are 0.
                    while (!hittablelimit)
                    {
                        if (slc.nexttableoffset + 1 + 1 + 1 + 1 >= table.LongLength) // Need room for header and a couple friends.
                        {
                            hittablelimit = true;
                            break;
                        }
                        long             linnum      = 0;
                        string           curfilename = null;
                        System.IO.Stream stm;
                        lock (streamqueue)
                        {
                            stm = streamqueue.GetNextStream(out curfilename);
                        }
                        if (null == stm)
                        {
                            break;
                        }
                        try
                        {
                            using (System.IO.StreamReader rstm = new System.IO.StreamReader(stm))
                            {
                                linnum = 0;
                                Int32 previd = 0;
                                for (; ;)
                                {
                                    linnum++;
                                    Int32 uid, fid;
#if DEBUG
                                    if (_sleep)
                                    {
                                        System.Threading.Thread.Sleep(DateTime.Now.Millisecond % 256);
                                    }
#endif
                                    if (GetNextLine(rstm, out uid, out fid, linebuf))
                                    {
                                        if (uid <= 0)
                                        {
                                            //throw new Exception("bad line: user ID invalid");
                                            numskippedlines++;
                                            continue;
                                        }
                                        if (fid <= 0)
                                        {
                                            //throw new Exception("bad line: friend ID invalid");
                                            numskippedlines++;
                                            continue;
                                        }
                                    }

                                    if (uid != previd)
                                    {
                                        if (0 != previd)
                                        {
                                            lock (slc)
                                            {
                                                if (!UserData.DoesUserFit(this.table, slc.nexttableoffset, friendsbuf))
                                                {
                                                    hittablelimit = true;
                                                    break;
                                                }
                                                UserData ud = UserData.Create(this.table, slc.nexttableoffset);
                                                SetRowIDForUserID(previd, slc.nexttableoffset);
                                                ud._setuserid(previd);
                                                ud._setfriends(friendsbuf);
                                                slc.nexttableoffset = ud.GetNextRowID();
                                                realusercount++;
                                            }
                                            friendsbuf.Clear();
                                        }
                                    }
                                    if (0 == uid)
                                    {
                                        break;
                                    }
                                    if (friendsbuf.Count < maxfriends)
                                    {
                                        friendsbuf.Add(fid);
                                    }
                                    previd = uid;
                                }
                            }
                        }
                        catch (Exception e)
                        {
                            try
                            {
                                LogLine("\r\nLoad error: " + e.ToString() + " file: " + (null == curfilename ? "<null>" : "'" + curfilename + "'") + "; line: " + linnum.ToString() + "; table offset: " + slc.nexttableoffset.ToString() + "; exception: " + e.ToString() + "\r\n(Skipping the rest of this file)\r\n");
                            }
                            catch (Exception e2)
                            {
                                LogLine("\r\nError error: " + e2.ToString() + "\r\n");
                            }
                        }
                        finally
                        {
                            stm.Close();
                        }
                    }
                }
            }
            catch (Exception e)
            {
                LogLine("\r\nLoad error: loadthreadproc catch-all exception: " + e.ToString() + "\r\n");
            }
        }
        public void Load(int filebuffersize, string file, int loadthreadcount)
        {
            LogLine("---- RectangularFriendsLists.Load ----");

            if (loadthreadcount < 1)
            {
                throw new Exception("Bad load thread count");
            }

            //usercount = GetUniqueUserCount(file);

            table = new TwoDInt32(usercount, 1 + maxfriends);
            numskippedlines = 0;

            if (0 == usertotableranges.Count)
            {
                AddUserIDRange(0, 400000000 - 1);
            }

            using (MySpace.DataMining.SeqFileStream.SeqFileStreamReader stm = new MySpace.DataMining.SeqFileStream.SeqFileStreamReader(file, System.IO.FileMode.Open, System.IO.FileAccess.Read, System.IO.FileShare.Read, filebuffersize))
            {
                SharedLoadContext slc = new SharedLoadContext();
                slc.streamqueue = stm;
                List<System.Threading.Thread> threads = new List<System.Threading.Thread>(loadthreadcount);
                for (int i = 0; i != loadthreadcount; i++)
                {
                    System.Threading.Thread thd = new System.Threading.Thread(new System.Threading.ParameterizedThreadStart(loadthreadproc));
                    thd.Name = "CFO_Load_" + i.ToString();
                    thd.Start(slc);
                    threads.Add(thd);
                }
                for (int i = 0; i != loadthreadcount; i++)
                {
                    threads[i].Join();
                }
            }

            if (realusercount >= usercount)
            {
                //GotException(new Exception("row limit reached"));
                LogLine("row limit reached");
            }

            LogLine("Load finished: number of rows: " + realusercount.ToString() + " (max " + usercount.ToString() + ")");
        }
Beispiel #6
0
        void loadthreadproc(object obj)
        {
            try
            {
#if DEBUG
#else
                unchecked
#endif
                {
                    SharedLoadContext slc = (SharedLoadContext)obj;
                    SeqFileStream.SeqFileStreamReader streamqueue = slc.streamqueue;
                    Int32[] friendsbuf = new Int32[maxfriends];
                    byte[]  linebuf    = new byte[32];
                    // Assumes trailing friend-IDs are 0.
                    while (slc.nexttableindex < usercount)
                    {
                        long             linnum      = 0;
                        string           curfilename = null;
                        System.IO.Stream stm;
                        lock (streamqueue)
                        {
                            stm = streamqueue.GetNextStream(out curfilename);
                        }
                        if (null == stm)
                        {
                            break;
                        }
                        try
                        {
                            using (System.IO.StreamReader rstm = new System.IO.StreamReader(stm))
                            {
                                linnum = 0;
                                int   friendindex = 0;
                                Int32 previd      = 0;
                                for (; ;)
                                {
                                    linnum++;
                                    Int32 uid, fid;
                                    if (GetNextLine(rstm, out uid, out fid, linebuf))
                                    {
                                        if (uid <= 0)
                                        {
                                            //throw new Exception("bad line: user ID invalid");
                                            numskippedlines++;
                                            continue;
                                        }
                                        if (fid <= 0)
                                        {
                                            //throw new Exception("bad line: friend ID invalid");
                                            numskippedlines++;
                                            continue;
                                        }
                                    }

                                    if (uid != previd)
                                    {
                                        if (0 != previd)
                                        {
                                            if (slc.nexttableindex >= usercount)
                                            {
                                                break;
                                            }
                                            int myfriendindex = friendindex;
                                            friendindex = 0;
                                            lock (slc)
                                            {
                                                SetRowIDForUserID(previd, slc.nexttableindex);
                                                table[slc.nexttableindex, 0] = previd;
                                                for (int i = 0; i != myfriendindex; i++)
                                                {
                                                    table[slc.nexttableindex, 1 + i] = friendsbuf[i];
                                                }
                                                slc.nexttableindex++;
                                            }
                                        }
                                    }
                                    if (0 == uid)
                                    {
                                        break;
                                    }
                                    if (friendindex < maxfriends)
                                    {
                                        friendsbuf[friendindex] = fid;
                                        friendindex++;
                                    }
                                    previd = uid;
                                }
                            }
                        }
                        catch (Exception e)
                        {
                            try
                            {
                                LogLine("\r\nLoad error: " + e.ToString() + " file: " + (null == curfilename ? "<null>" : "'" + curfilename + "'") + "; line: " + linnum.ToString() + "; table index: " + slc.nexttableindex.ToString() + "; exception: " + e.ToString() + "\r\n(Skipping the rest of this file)\r\n");
                            }
                            catch (Exception e2)
                            {
                                LogLine("\r\nError error: " + e2.ToString() + "\r\n");
                            }
                        }
                        finally
                        {
                            stm.Close();
                        }
                    }

                    realusercount = slc.nexttableindex;
                }
            }
            catch (Exception e)
            {
                LogLine("\r\nLoad error: loadthreadproc catch-all exception: " + e.ToString() + "\r\n");
            }
        }
        public void Load(int filebuffersize, string file, int loadthreadcount)
        {
            LogLine("---- JaggedFriendsLists.Load ----");

            if (loadthreadcount < 1)
            {
                throw new Exception("Bad load thread count");
            }

            //usercount = GetUniqueUserCount(file);

            if (table.IsNull)
            {
                //table = new OneDInt32(1024 * 1024 * 1024 / 2); // ...
                throw new Exception("Table is null; must call SetUserTableSize");
            }
            numskippedlines = 0;

            if (0 == usertotableranges.Count)
            {
                AddUserIDRange(0, 400000000 - 1);
            }

            using (MySpace.DataMining.SeqFileStream.SeqFileStreamReader stm = new MySpace.DataMining.SeqFileStream.SeqFileStreamReader(file, System.IO.FileMode.Open, System.IO.FileAccess.Read, System.IO.FileShare.Read, filebuffersize))
            {
                SharedLoadContext slc = new SharedLoadContext();
                slc.streamqueue = stm;
                List<System.Threading.Thread> threads = new List<System.Threading.Thread>(loadthreadcount);
                for (int i = 0; i != loadthreadcount; i++)
                {
                    System.Threading.Thread thd = new System.Threading.Thread(new System.Threading.ParameterizedThreadStart(loadthreadproc));
                    thd.Name = "CFO_Load_" + i.ToString();
                    thd.Start(slc);
                    threads.Add(thd);
                }
                for (int i = 0; i != loadthreadcount; i++)
                {
                    threads[i].Join();
                }
            }

            if (hittablelimit)
            {
                //GotException(new Exception("table limit reached"));
                LogLine("table limit reached");
            }

            LogLine("Load finished: number of rows: " + realusercount.ToString());
        }
        public void Load(int filebuffersize, string file)
        {
            LogLine("---- RectangularFriendsListsHugeMemory.Load ----");

            //usercount = GetUniqueUserCount(file);

            int blockusercount = usercount / nslaves;

            if (0 != (usercount % nslaves))
            {
                blockusercount++;
            }
            long blocksize = ((long)blockusercount * (1 + (long)MaxFriends)) << 2;

            int packetsize = nbatchedrows * (((1 + MaxFriends) << 2) + 1 + 8 + 4) + 1;

            smtable         = new SlaveMemory(objname, "CollaborativeFilteringObjectsSlave.exe", blocksize, packetsize, nthreads, nslaves);
            numskippedlines = 0;

            if (0 == usertotableranges.Count)
            {
                AddUserIDRange(0, 400000000 - 1);
            }

            using (MySpace.DataMining.SeqFileStream.SeqFileStreamReader stm = new MySpace.DataMining.SeqFileStream.SeqFileStreamReader(file, System.IO.FileMode.Open, System.IO.FileAccess.Read, System.IO.FileShare.Read, filebuffersize))
            {
                SharedLoadContext slc = new SharedLoadContext();
                slc.streamqueue = stm;
                List <System.Threading.Thread> threads = new List <System.Threading.Thread>(nthreads);
                smtable.Open(); // !
                for (int i = 0; i != nthreads; i++)
                {
                    ThreadLoadData tld = new ThreadLoadData();
                    tld.slc = slc;
                    tld.tv  = smtable.ThreadViews[i];
                    System.Threading.Thread thd = new System.Threading.Thread(new System.Threading.ParameterizedThreadStart(loadthreadproc));
                    thd.Name = "CFO_Load_" + i.ToString();
                    thd.Start(tld);
                    threads.Add(thd);
                }
                for (int i = 0; i != nthreads; i++)
                {
                    threads[i].Join();
                }
            }
            if (null != goterr)
            {
                Exception e = goterr;
                goterr = null;
                throw e;
            }

#if DEBUG
            if (smtable._batchspilled)
            {
                LogLine("RectangularFriendsListsHugeMemory.Load: SlaveMemory batching spilled to another slave");
                smtable._batchspilled = false;
            }
#endif

            if (realusercount >= usercount)
            {
                //GotException(new Exception("row limit reached"));
                LogLine("row limit reached");
            }

            LogLine("Load finished: number of rows: " + realusercount.ToString() + " (max " + usercount.ToString() + ")");
        }
        void loadthreadproc(object obj)
        {
            try
            {
#if DEBUG
#else
                unchecked
#endif
                {
                    ThreadBatchData                   batchdata   = new ThreadBatchData(this, nbatchedrows);
                    int                               curbatch    = 0; // Current row from nbatchedrows in batchdata.
                    ThreadLoadData                    tld         = (ThreadLoadData)obj;
                    SharedLoadContext                 slc         = tld.slc;
                    SlaveMemory.ThreadView            tv          = tld.tv;
                    SeqFileStream.SeqFileStreamReader streamqueue = slc.streamqueue;
                    Int32[]                           friendsbuf  = new Int32[maxfriends];
                    byte[]                            linebuf     = new byte[32];
#if DEBUG
                    for (int i = 0; i < maxfriends; i++)
                    {
                        friendsbuf[i] = -929292;
                    }
#endif
                    // Assumes trailing friend-IDs are 0.
                    while (slc.nexttableindex < usercount)
                    {
                        long             linnum      = 0;
                        string           curfilename = null;
                        System.IO.Stream stm         = null;
                        try
                        {
                            lock (streamqueue)
                            {
                                stm = streamqueue.GetNextStream(out curfilename);
                            }
                            if (null == stm)
                            {
                                break;
                            }
                            using (System.IO.StreamReader rstm = new System.IO.StreamReader(stm))
                            {
                                linnum = 0;
                                int   friendindex = 0;
                                Int32 previd      = 0;
                                for (; ;)
                                {
                                    linnum++;
                                    Int32 uid, fid;
                                    if (GetNextLine(rstm, out uid, out fid, linebuf))
                                    {
                                        if (uid <= 0)
                                        {
                                            //throw new Exception("bad line: user ID invalid");
                                            numskippedlines++;
                                            continue;
                                        }
                                        if (fid <= 0)
                                        {
                                            //throw new Exception("bad line: friend ID invalid");
                                            numskippedlines++;
                                            continue;
                                        }
                                    }

                                    if (uid != previd)
                                    {
                                        if (0 != previd)
                                        {
                                            if (slc.nexttableindex >= usercount)
                                            {
                                                break;
                                            }
                                            for (int i = friendindex; i < maxfriends; i++)
                                            {
                                                friendsbuf[i] = 0;
                                            }
                                            friendindex = 0;
                                            lock (slc)
                                            {
                                                SetRowIDForUserID(previd, slc.nexttableindex);
                                                batchdata.SetRowData(curbatch, previd, friendsbuf);
                                                batchdata.BatchRow(curbatch, slc.nexttableindex);
                                                curbatch++;
                                                if (curbatch >= nbatchedrows)
                                                {
                                                    curbatch = 0;
                                                    batchdata.BatchSet(tv);
                                                }
                                                slc.nexttableindex++;
                                            }
#if DEBUG
                                            for (int i = 0; i < maxfriends; i++)
                                            {
                                                friendsbuf[i] = -929292;
                                            }
#endif
                                        }
                                    }
                                    if (0 == uid)
                                    {
                                        break;
                                    }
                                    if (friendindex < maxfriends)
                                    {
                                        friendsbuf[friendindex] = fid;
                                        friendindex++;
                                    }
                                    previd = uid;
                                }
                            }
                        }
                        catch (Exception e)
                        {
                            if (null == goterr)
                            {
                                goterr = new Exception("Load error: " + e.ToString() + " file: " + (null == curfilename ? "<null>" : "'" + curfilename + "'") + "; line: " + linnum.ToString() + "; table index: " + slc.nexttableindex.ToString(), e);
                            }
                            try
                            {
                                LogLine("\r\nLoad error: " + e.ToString() + " file: " + (null == curfilename ? "<null>" : "'" + curfilename + "'") + "; line: " + linnum.ToString() + "; table index: " + slc.nexttableindex.ToString() + "; exception: " + e.ToString() + "\r\n(Skipping the rest of this file)\r\n");
                            }
                            catch (Exception e2)
                            {
                                LogLine("\r\nError error: " + e2.ToString() + "\r\n");
                            }
                        }
                        finally
                        {
                            if (null != stm)
                            {
                                stm.Close();
                            }
                        }
                    }
                    if (curbatch > 0)
                    {
                        try
                        {
                            batchdata.BatchSet(tv);
                        }
                        catch (Exception e33)
                        {
                            int i33 = 33 + 33;
                            throw e33;
                        }
                    }

                    realusercount = slc.nexttableindex;
                }
            }
            catch (Exception e)
            {
                if (null == goterr)
                {
                    goterr = new Exception("Load error: loadthreadproc catch-all exception", e);
                }
                LogLine("\r\nLoad error: loadthreadproc catch-all exception: " + e.ToString() + "\r\n");
            }
        }