public void Load(int filebuffersize, string file, int loadthreadcount) { LogLine("---- JaggedFriendsLists.Load ----"); if (loadthreadcount < 1) { throw new Exception("Bad load thread count"); } //usercount = GetUniqueUserCount(file); if (table.IsNull) { //table = new OneDInt32(1024 * 1024 * 1024 / 2); // ... throw new Exception("Table is null; must call SetUserTableSize"); } numskippedlines = 0; if (0 == usertotableranges.Count) { AddUserIDRange(0, 400000000 - 1); } using (MySpace.DataMining.SeqFileStream.SeqFileStreamReader stm = new MySpace.DataMining.SeqFileStream.SeqFileStreamReader(file, System.IO.FileMode.Open, System.IO.FileAccess.Read, System.IO.FileShare.Read, filebuffersize)) { SharedLoadContext slc = new SharedLoadContext(); slc.streamqueue = stm; List <System.Threading.Thread> threads = new List <System.Threading.Thread>(loadthreadcount); for (int i = 0; i != loadthreadcount; i++) { System.Threading.Thread thd = new System.Threading.Thread(new System.Threading.ParameterizedThreadStart(loadthreadproc)); thd.Name = "CFO_Load_" + i.ToString(); thd.Start(slc); threads.Add(thd); } for (int i = 0; i != loadthreadcount; i++) { threads[i].Join(); } } if (hittablelimit) { //GotException(new Exception("table limit reached")); LogLine("table limit reached"); } LogLine("Load finished: number of rows: " + realusercount.ToString()); }
public void Load(int filebuffersize, string file, int loadthreadcount) { LogLine("---- RectangularFriendsLists.Load ----"); if (loadthreadcount < 1) { throw new Exception("Bad load thread count"); } //usercount = GetUniqueUserCount(file); table = new TwoDInt32(usercount, 1 + maxfriends); numskippedlines = 0; if (0 == usertotableranges.Count) { AddUserIDRange(0, 400000000 - 1); } using (MySpace.DataMining.SeqFileStream.SeqFileStreamReader stm = new MySpace.DataMining.SeqFileStream.SeqFileStreamReader(file, System.IO.FileMode.Open, System.IO.FileAccess.Read, System.IO.FileShare.Read, filebuffersize)) { SharedLoadContext slc = new SharedLoadContext(); slc.streamqueue = stm; List <System.Threading.Thread> threads = new List <System.Threading.Thread>(loadthreadcount); for (int i = 0; i != loadthreadcount; i++) { System.Threading.Thread thd = new System.Threading.Thread(new System.Threading.ParameterizedThreadStart(loadthreadproc)); thd.Name = "CFO_Load_" + i.ToString(); thd.Start(slc); threads.Add(thd); } for (int i = 0; i != loadthreadcount; i++) { threads[i].Join(); } } if (realusercount >= usercount) { //GotException(new Exception("row limit reached")); LogLine("row limit reached"); } LogLine("Load finished: number of rows: " + realusercount.ToString() + " (max " + usercount.ToString() + ")"); }
public void Load(int filebuffersize, string file) { LogLine("---- RectangularFriendsListsHugeMemory.Load ----"); //usercount = GetUniqueUserCount(file); int blockusercount = usercount / nslaves; if (0 != (usercount % nslaves)) { blockusercount++; } long blocksize = ((long)blockusercount * (1 + (long)MaxFriends)) << 2; int packetsize = nbatchedrows * (((1 + MaxFriends) << 2) + 1 + 8 + 4) + 1; smtable = new SlaveMemory(objname, "CollaborativeFilteringObjectsSlave.exe", blocksize, packetsize, nthreads, nslaves); numskippedlines = 0; if (0 == usertotableranges.Count) { AddUserIDRange(0, 400000000 - 1); } using (MySpace.DataMining.SeqFileStream.SeqFileStreamReader stm = new MySpace.DataMining.SeqFileStream.SeqFileStreamReader(file, System.IO.FileMode.Open, System.IO.FileAccess.Read, System.IO.FileShare.Read, filebuffersize)) { SharedLoadContext slc = new SharedLoadContext(); slc.streamqueue = stm; List<System.Threading.Thread> threads = new List<System.Threading.Thread>(nthreads); smtable.Open(); // ! for (int i = 0; i != nthreads; i++) { ThreadLoadData tld = new ThreadLoadData(); tld.slc = slc; tld.tv = smtable.ThreadViews[i]; System.Threading.Thread thd = new System.Threading.Thread(new System.Threading.ParameterizedThreadStart(loadthreadproc)); thd.Name = "CFO_Load_" + i.ToString(); thd.Start(tld); threads.Add(thd); } for (int i = 0; i != nthreads; i++) { threads[i].Join(); } } if (null != goterr) { Exception e = goterr; goterr = null; throw e; } #if DEBUG if (smtable._batchspilled) { LogLine("RectangularFriendsListsHugeMemory.Load: SlaveMemory batching spilled to another slave"); smtable._batchspilled = false; } #endif if (realusercount >= usercount) { //GotException(new Exception("row limit reached")); LogLine("row limit reached"); } LogLine("Load finished: number of rows: " + realusercount.ToString() + " (max " + usercount.ToString() + ")"); }
void loadthreadproc(object obj) { try { #if DEBUG #else unchecked #endif { SharedLoadContext slc = (SharedLoadContext)obj; SeqFileStream.SeqFileStreamReader streamqueue = slc.streamqueue; List <Int32> friendsbuf = new List <int>(256); byte[] linebuf = new byte[32]; // Assumes trailing friend-IDs are 0. while (!hittablelimit) { if (slc.nexttableoffset + 1 + 1 + 1 + 1 >= table.LongLength) // Need room for header and a couple friends. { hittablelimit = true; break; } long linnum = 0; string curfilename = null; System.IO.Stream stm; lock (streamqueue) { stm = streamqueue.GetNextStream(out curfilename); } if (null == stm) { break; } try { using (System.IO.StreamReader rstm = new System.IO.StreamReader(stm)) { linnum = 0; Int32 previd = 0; for (; ;) { linnum++; Int32 uid, fid; #if DEBUG if (_sleep) { System.Threading.Thread.Sleep(DateTime.Now.Millisecond % 256); } #endif if (GetNextLine(rstm, out uid, out fid, linebuf)) { if (uid <= 0) { //throw new Exception("bad line: user ID invalid"); numskippedlines++; continue; } if (fid <= 0) { //throw new Exception("bad line: friend ID invalid"); numskippedlines++; continue; } } if (uid != previd) { if (0 != previd) { lock (slc) { if (!UserData.DoesUserFit(this.table, slc.nexttableoffset, friendsbuf)) { hittablelimit = true; break; } UserData ud = UserData.Create(this.table, slc.nexttableoffset); SetRowIDForUserID(previd, slc.nexttableoffset); ud._setuserid(previd); ud._setfriends(friendsbuf); slc.nexttableoffset = ud.GetNextRowID(); realusercount++; } friendsbuf.Clear(); } } if (0 == uid) { break; } if (friendsbuf.Count < maxfriends) { friendsbuf.Add(fid); } previd = uid; } } } catch (Exception e) { try { LogLine("\r\nLoad error: " + e.ToString() + " file: " + (null == curfilename ? "<null>" : "'" + curfilename + "'") + "; line: " + linnum.ToString() + "; table offset: " + slc.nexttableoffset.ToString() + "; exception: " + e.ToString() + "\r\n(Skipping the rest of this file)\r\n"); } catch (Exception e2) { LogLine("\r\nError error: " + e2.ToString() + "\r\n"); } } finally { stm.Close(); } } } } catch (Exception e) { LogLine("\r\nLoad error: loadthreadproc catch-all exception: " + e.ToString() + "\r\n"); } }
public void Load(int filebuffersize, string file, int loadthreadcount) { LogLine("---- RectangularFriendsLists.Load ----"); if (loadthreadcount < 1) { throw new Exception("Bad load thread count"); } //usercount = GetUniqueUserCount(file); table = new TwoDInt32(usercount, 1 + maxfriends); numskippedlines = 0; if (0 == usertotableranges.Count) { AddUserIDRange(0, 400000000 - 1); } using (MySpace.DataMining.SeqFileStream.SeqFileStreamReader stm = new MySpace.DataMining.SeqFileStream.SeqFileStreamReader(file, System.IO.FileMode.Open, System.IO.FileAccess.Read, System.IO.FileShare.Read, filebuffersize)) { SharedLoadContext slc = new SharedLoadContext(); slc.streamqueue = stm; List<System.Threading.Thread> threads = new List<System.Threading.Thread>(loadthreadcount); for (int i = 0; i != loadthreadcount; i++) { System.Threading.Thread thd = new System.Threading.Thread(new System.Threading.ParameterizedThreadStart(loadthreadproc)); thd.Name = "CFO_Load_" + i.ToString(); thd.Start(slc); threads.Add(thd); } for (int i = 0; i != loadthreadcount; i++) { threads[i].Join(); } } if (realusercount >= usercount) { //GotException(new Exception("row limit reached")); LogLine("row limit reached"); } LogLine("Load finished: number of rows: " + realusercount.ToString() + " (max " + usercount.ToString() + ")"); }
void loadthreadproc(object obj) { try { #if DEBUG #else unchecked #endif { SharedLoadContext slc = (SharedLoadContext)obj; SeqFileStream.SeqFileStreamReader streamqueue = slc.streamqueue; Int32[] friendsbuf = new Int32[maxfriends]; byte[] linebuf = new byte[32]; // Assumes trailing friend-IDs are 0. while (slc.nexttableindex < usercount) { long linnum = 0; string curfilename = null; System.IO.Stream stm; lock (streamqueue) { stm = streamqueue.GetNextStream(out curfilename); } if (null == stm) { break; } try { using (System.IO.StreamReader rstm = new System.IO.StreamReader(stm)) { linnum = 0; int friendindex = 0; Int32 previd = 0; for (; ;) { linnum++; Int32 uid, fid; if (GetNextLine(rstm, out uid, out fid, linebuf)) { if (uid <= 0) { //throw new Exception("bad line: user ID invalid"); numskippedlines++; continue; } if (fid <= 0) { //throw new Exception("bad line: friend ID invalid"); numskippedlines++; continue; } } if (uid != previd) { if (0 != previd) { if (slc.nexttableindex >= usercount) { break; } int myfriendindex = friendindex; friendindex = 0; lock (slc) { SetRowIDForUserID(previd, slc.nexttableindex); table[slc.nexttableindex, 0] = previd; for (int i = 0; i != myfriendindex; i++) { table[slc.nexttableindex, 1 + i] = friendsbuf[i]; } slc.nexttableindex++; } } } if (0 == uid) { break; } if (friendindex < maxfriends) { friendsbuf[friendindex] = fid; friendindex++; } previd = uid; } } } catch (Exception e) { try { LogLine("\r\nLoad error: " + e.ToString() + " file: " + (null == curfilename ? "<null>" : "'" + curfilename + "'") + "; line: " + linnum.ToString() + "; table index: " + slc.nexttableindex.ToString() + "; exception: " + e.ToString() + "\r\n(Skipping the rest of this file)\r\n"); } catch (Exception e2) { LogLine("\r\nError error: " + e2.ToString() + "\r\n"); } } finally { stm.Close(); } } realusercount = slc.nexttableindex; } } catch (Exception e) { LogLine("\r\nLoad error: loadthreadproc catch-all exception: " + e.ToString() + "\r\n"); } }
public void Load(int filebuffersize, string file, int loadthreadcount) { LogLine("---- JaggedFriendsLists.Load ----"); if (loadthreadcount < 1) { throw new Exception("Bad load thread count"); } //usercount = GetUniqueUserCount(file); if (table.IsNull) { //table = new OneDInt32(1024 * 1024 * 1024 / 2); // ... throw new Exception("Table is null; must call SetUserTableSize"); } numskippedlines = 0; if (0 == usertotableranges.Count) { AddUserIDRange(0, 400000000 - 1); } using (MySpace.DataMining.SeqFileStream.SeqFileStreamReader stm = new MySpace.DataMining.SeqFileStream.SeqFileStreamReader(file, System.IO.FileMode.Open, System.IO.FileAccess.Read, System.IO.FileShare.Read, filebuffersize)) { SharedLoadContext slc = new SharedLoadContext(); slc.streamqueue = stm; List<System.Threading.Thread> threads = new List<System.Threading.Thread>(loadthreadcount); for (int i = 0; i != loadthreadcount; i++) { System.Threading.Thread thd = new System.Threading.Thread(new System.Threading.ParameterizedThreadStart(loadthreadproc)); thd.Name = "CFO_Load_" + i.ToString(); thd.Start(slc); threads.Add(thd); } for (int i = 0; i != loadthreadcount; i++) { threads[i].Join(); } } if (hittablelimit) { //GotException(new Exception("table limit reached")); LogLine("table limit reached"); } LogLine("Load finished: number of rows: " + realusercount.ToString()); }
public void Load(int filebuffersize, string file) { LogLine("---- RectangularFriendsListsHugeMemory.Load ----"); //usercount = GetUniqueUserCount(file); int blockusercount = usercount / nslaves; if (0 != (usercount % nslaves)) { blockusercount++; } long blocksize = ((long)blockusercount * (1 + (long)MaxFriends)) << 2; int packetsize = nbatchedrows * (((1 + MaxFriends) << 2) + 1 + 8 + 4) + 1; smtable = new SlaveMemory(objname, "CollaborativeFilteringObjectsSlave.exe", blocksize, packetsize, nthreads, nslaves); numskippedlines = 0; if (0 == usertotableranges.Count) { AddUserIDRange(0, 400000000 - 1); } using (MySpace.DataMining.SeqFileStream.SeqFileStreamReader stm = new MySpace.DataMining.SeqFileStream.SeqFileStreamReader(file, System.IO.FileMode.Open, System.IO.FileAccess.Read, System.IO.FileShare.Read, filebuffersize)) { SharedLoadContext slc = new SharedLoadContext(); slc.streamqueue = stm; List <System.Threading.Thread> threads = new List <System.Threading.Thread>(nthreads); smtable.Open(); // ! for (int i = 0; i != nthreads; i++) { ThreadLoadData tld = new ThreadLoadData(); tld.slc = slc; tld.tv = smtable.ThreadViews[i]; System.Threading.Thread thd = new System.Threading.Thread(new System.Threading.ParameterizedThreadStart(loadthreadproc)); thd.Name = "CFO_Load_" + i.ToString(); thd.Start(tld); threads.Add(thd); } for (int i = 0; i != nthreads; i++) { threads[i].Join(); } } if (null != goterr) { Exception e = goterr; goterr = null; throw e; } #if DEBUG if (smtable._batchspilled) { LogLine("RectangularFriendsListsHugeMemory.Load: SlaveMemory batching spilled to another slave"); smtable._batchspilled = false; } #endif if (realusercount >= usercount) { //GotException(new Exception("row limit reached")); LogLine("row limit reached"); } LogLine("Load finished: number of rows: " + realusercount.ToString() + " (max " + usercount.ToString() + ")"); }
void loadthreadproc(object obj) { try { #if DEBUG #else unchecked #endif { ThreadBatchData batchdata = new ThreadBatchData(this, nbatchedrows); int curbatch = 0; // Current row from nbatchedrows in batchdata. ThreadLoadData tld = (ThreadLoadData)obj; SharedLoadContext slc = tld.slc; SlaveMemory.ThreadView tv = tld.tv; SeqFileStream.SeqFileStreamReader streamqueue = slc.streamqueue; Int32[] friendsbuf = new Int32[maxfriends]; byte[] linebuf = new byte[32]; #if DEBUG for (int i = 0; i < maxfriends; i++) { friendsbuf[i] = -929292; } #endif // Assumes trailing friend-IDs are 0. while (slc.nexttableindex < usercount) { long linnum = 0; string curfilename = null; System.IO.Stream stm = null; try { lock (streamqueue) { stm = streamqueue.GetNextStream(out curfilename); } if (null == stm) { break; } using (System.IO.StreamReader rstm = new System.IO.StreamReader(stm)) { linnum = 0; int friendindex = 0; Int32 previd = 0; for (; ;) { linnum++; Int32 uid, fid; if (GetNextLine(rstm, out uid, out fid, linebuf)) { if (uid <= 0) { //throw new Exception("bad line: user ID invalid"); numskippedlines++; continue; } if (fid <= 0) { //throw new Exception("bad line: friend ID invalid"); numskippedlines++; continue; } } if (uid != previd) { if (0 != previd) { if (slc.nexttableindex >= usercount) { break; } for (int i = friendindex; i < maxfriends; i++) { friendsbuf[i] = 0; } friendindex = 0; lock (slc) { SetRowIDForUserID(previd, slc.nexttableindex); batchdata.SetRowData(curbatch, previd, friendsbuf); batchdata.BatchRow(curbatch, slc.nexttableindex); curbatch++; if (curbatch >= nbatchedrows) { curbatch = 0; batchdata.BatchSet(tv); } slc.nexttableindex++; } #if DEBUG for (int i = 0; i < maxfriends; i++) { friendsbuf[i] = -929292; } #endif } } if (0 == uid) { break; } if (friendindex < maxfriends) { friendsbuf[friendindex] = fid; friendindex++; } previd = uid; } } } catch (Exception e) { if (null == goterr) { goterr = new Exception("Load error: " + e.ToString() + " file: " + (null == curfilename ? "<null>" : "'" + curfilename + "'") + "; line: " + linnum.ToString() + "; table index: " + slc.nexttableindex.ToString(), e); } try { LogLine("\r\nLoad error: " + e.ToString() + " file: " + (null == curfilename ? "<null>" : "'" + curfilename + "'") + "; line: " + linnum.ToString() + "; table index: " + slc.nexttableindex.ToString() + "; exception: " + e.ToString() + "\r\n(Skipping the rest of this file)\r\n"); } catch (Exception e2) { LogLine("\r\nError error: " + e2.ToString() + "\r\n"); } } finally { if (null != stm) { stm.Close(); } } } if (curbatch > 0) { try { batchdata.BatchSet(tv); } catch (Exception e33) { int i33 = 33 + 33; throw e33; } } realusercount = slc.nexttableindex; } } catch (Exception e) { if (null == goterr) { goterr = new Exception("Load error: loadthreadproc catch-all exception", e); } LogLine("\r\nLoad error: loadthreadproc catch-all exception: " + e.ToString() + "\r\n"); } }