private void get_req_cache_unfiltered(ref int cpu_inst_cnt, ref string line, Char[] delim, ref string[] tokens, out Req rd_req, out Req wb_req) { Dbg.AssertPrint(tokens.Length == 6, "trace line = " + line); ReqType req_type = (int.Parse(tokens[5]) == 0) ? ReqType.READ : ReqType.WRITE; // Read-only requests while (Config.proc.wb == false && req_type == ReqType.WRITE) { line = read_trace(); tokens = line.Split(delim); req_type = (int.Parse(tokens[5]) == 0) ? ReqType.READ : ReqType.WRITE; } // Set instruction count b/w requests ulong icount = ulong.Parse(tokens[0]); if (cur_inst_count == 0) { cpu_inst_cnt = 0; } else { cpu_inst_cnt = (int)(icount - cur_inst_count); Dbg.AssertPrint(cpu_inst_cnt >= 0, "Negative instruction count"); } cur_inst_count = icount; // Parse virtual address ulong vaddr = ulong.Parse(tokens[2]); vaddr = vaddr + (((ulong)pid) << 48); rd_req = RequestPool.Depool(); rd_req.Set(pid, req_type, vaddr); wb_req = null; }
public Trace(int pid, string trace_fname) { this.pid = pid; foreach (string dir in Config.TraceDirs.Split(',', ' ')) { if (File.Exists(dir + "/" + trace_fname)) { trace_fname = dir + "/" + trace_fname; } } // Trace file Dbg.AssertPrint(File.Exists(trace_fname), trace_fname + " does not exist in the given paths."); this.trace_fname = trace_fname; gzip_reader = new GZipStream(File.OpenRead(trace_fname), CompressionMode.Decompress); cur_inst_count = 0; buffered_wb_addr = -1; if (trace_fname.IndexOf("-rc") != -1) { copy_trace_file = true; } else { copy_trace_file = false; } copy_to_req_addr_q = new Queue <ulong>(256); copy_to_req_ipc_deduction = 0; }
// Callback function when a memory request is complete. This retires instructions or inserts data back into caches. public void recv_req(Req req) { // Install the rest of the words in the cacheline bool cw_contains_write = false; //stats if (!req.CpyGenReq) { Stat.procs[pid].read_req_served.collect(); Stat.procs[pid].read_avg_latency.collect(req.Latency); } // Handles the read write request if (req.RdWr) { Dbg.Assert(read_write_q.Contains(req.BlockAddr)); read_write_q.Remove(req.BlockAddr); } //free up instruction window and mshr bool contains_write = inst_wnd.set_ready(req.BlockAddr); contains_write |= cw_contains_write; mshr.RemoveAll(x => x == req.BlockAddr); Req wb_req = null; // Install cachelines and handle dirty block evictions if (Config.proc.cache_enabled) { cache_handler(req, contains_write); } else { Dbg.AssertPrint(!contains_write, "Inst window contains write reqeusts."); // Writeback based on the cache filtered traces wb_req = req.WbReq; if (wb_req != null) { bool wb_merge = wb_q.Exists(x => x.BlockAddr == wb_req.BlockAddr); if (!wb_merge) { addWB(wb_req); } else { RequestPool.Enpool(wb_req); } } } //destory req RequestPool.Enpool(req); out_read_req--; }
// Null upper_c means c is a L1 cache, otherwise L2 public void service_cache_hit_queue(Cache c, Cache upper_c = null) { LinkedList <Req> hit_queue = c.get_hit_queue(pid); while (hit_queue.Count != 0) { Req req = hit_queue.First.Value; int hit_pid = req.Pid; Dbg.Assert(hit_pid == pid); if ((ulong)req.TsDeparture <= cycles) { // Hit in L2 and move L2 $line to L1 if (upper_c != null) { Cache l1c = upper_c; Dbg.AssertPrint(!l1c.in_cache(req.BlockAddr), "$line from an L2 hit shouldn't be in L1."); ulong l1c_wb_addr = l1c.cache_add(req.BlockAddr, req.Type, hit_pid); // Dirty $line eviction from L1, check L2 first. if (l1c_wb_addr != NULL_ADDRESS) { // Miss in L2 if (!c.is_cache_hit(l1c_wb_addr, ReqType.WRITE)) { // Another potential wb from L2 ulong l2c_wb_addr = c.cache_add(l1c_wb_addr, ReqType.WRITE, hit_pid); if (l2c_wb_addr != NULL_ADDRESS) { gen_cache_wb_req(l2c_wb_addr); } } } Stat.procs[pid].l2_cache_hit_avg_latency.collect((int)(cycles - (ulong)req.TsArrival)); } else { Stat.procs[pid].l1_cache_hit_avg_latency.collect((int)(cycles - (ulong)req.TsArrival)); } // Simply hit in L1 hit_queue.RemoveFirst(); inst_wnd.set_ready(req.BlockAddr); RequestPool.Enpool(req); } else { return; } } }
public void recv_copy_req(Req req) { //stats Stat.procs[pid].copy_req_served.collect(); Stat.procs[pid].copy_avg_latency.collect(req.Latency); //free up instruction window and mshr bool contains_write = inst_wnd.set_ready(req.BlockAddr, true); mshr.RemoveAll(x => x == req.BlockAddr); Dbg.AssertPrint(!contains_write, "Inst window contains write reqeusts. COPY is not supported in cache mode."); Dbg.Assert(req.WbReq == null); //destory req RequestPool.Enpool(req); }
public CacheHierarchy(int numCores) { L1List = new Cache[numCores]; L2List = new Cache[numCores]; uint blockSize = (uint)Config.proc.block_size; Cache l2c = null; for (int i = 0; i < numCores; i++) { // Empty L1 cache if we only modeling one shared LLC! if (Config.proc.llc_shared_cache_only) { L1List[i] = new Cache(); Dbg.AssertPrint(Config.proc.shared_l2, "Shared LLC option enabled! So shared_l2 knob needs to be true!"); } else { L1List[i] = new Cache(Config.proc.l1_cache_size, Config.proc.l1_cache_assoc, blockSize, Config.proc.l1_cache_hit_latency, i); } // One slice of private l2 for each core if (!Config.proc.shared_l2) { L2List[i] = new Cache(Config.proc.l2_cache_size, Config.proc.l2_cache_assoc, blockSize, Config.proc.l2_cache_hit_latency, i, true); } else { if (l2c == null) l2c = new Cache(Config.proc.l2_cache_size, Config.proc.l2_cache_assoc, blockSize, Config.proc.l2_cache_hit_latency, -1, true); L2List[i] = l2c; } } }
public void Set(int pid, ReqType type, ulong paddr) { // state Pid = pid; Type = type; // address TracePaddr = paddr; if (Config.mctrl.page_randomize) { Paddr = Prand.get_paddr(paddr); } else if (Config.mctrl.page_sequence) { Paddr = Pseq.get_paddr(paddr); } else if (Config.mctrl.page_contiguous) { Paddr = Pcontig.get_paddr(paddr); } else { Paddr = paddr; } BlockAddr = Paddr >> Config.proc.block_size_bits; Addr = MemMap.Translate(Paddr); Stat.procs[pid].allocated_physical_pages.collect(); reset_timing(); // Word offset ulong pwo = (Paddr & (63)) >> 2; Dbg.AssertPrint(pwo == ((paddr & (63)) >> 2), "Word offset should be the same for both virtual and physical addresses."); Dbg.AssertPrint(pwo < 16, "There should be only 8 words in a cacheline=" + pwo); WordOffset = (int)pwo; }
// Get a list of potential hot addresses to cache at the end of an epoch public void end_epoch() { int numCountersToKeep = _numHotHitCountersPerChan; double historyWeight = Config.mctrl.history_weight; _hotAddresses.Clear(); switch (_cMonType) { case CacheMonType.PerCore: for (int i = 0; i < Config.N; i++) { sort_update_dict(PerCoreAddrHitCounters[i], numCountersToKeep / Config.N, historyWeight); } break; case CacheMonType.PerChan: sort_update_dict(AddrHitCounters, numCountersToKeep, historyWeight); break; case CacheMonType.PerBank: for (int r = 0; r < _mctrl.Rmax; r++) { for (int b = 0; b < _mctrl.Bmax; b++) { sort_update_dict(PerBankAddrHitCounters[r, b], numCountersToKeep / (int)_mctrl.Rmax / (int)_mctrl.Bmax, historyWeight); } } break; default: throw new System.Exception("Unspecified cache monitor type."); } _numEpochs++; Dbg.AssertPrint(_hotAddresses.Count <= numCountersToKeep, "Too many hot addresses"); }
private void get_req_clone(ref int cpu_inst_cnt, ref string line, Char[] delim, ref string[] tokens, out Req rd_req, out Req wb_req) { string inst_type = tokens[0]; Dbg.Assert(copy_to_req_addr_q.Count == 0); if (inst_type == "R") { cpu_inst_cnt = int.Parse(tokens[3]); ulong rd_addr = ulong.Parse(tokens[1], System.Globalization.NumberStyles.HexNumber); //ulong rd_add_dup = rd_addr; rd_addr = rd_addr | (((ulong)pid) << 60); rd_req = RequestPool.Depool(); rd_req.Set(pid, ReqType.READ, rd_addr); ulong wb_addr = ulong.Parse(tokens[2], System.Globalization.NumberStyles.HexNumber); wb_req = null; if (wb_addr != 0) { wb_addr = wb_addr | (((ulong)pid) << 60); if (Config.proc.llc_shared_cache_only) { buffered_wb_addr = wb_addr; } else if (Config.proc.wb) { wb_req = RequestPool.Depool(); wb_req.Set(pid, ReqType.WRITE, wb_addr); } } } else if (inst_type == "C") { cpu_inst_cnt = 1; ulong dst_addr = ulong.Parse(tokens[1], System.Globalization.NumberStyles.HexNumber); ulong src_addr = ulong.Parse(tokens[2], System.Globalization.NumberStyles.HexNumber); dst_addr = dst_addr | (((ulong)pid) << 60); src_addr = src_addr | (((ulong)pid) << 60); // Convert the copy request into a list of RD/WR memory requests if (Config.mctrl.copy_method == COPY.MEMCPY) { // Simply convert every memcopy to multiple read and write requests Dbg.Assert(copy_to_req_addr_q.Count == 0); // SRC and DST address for (int i = 0; i < Config.mctrl.copy_gran; i++) { copy_to_req_addr_q.Enqueue(src_addr); copy_to_req_addr_q.Enqueue(dst_addr); // Increment by one cacheline src_addr += 64; dst_addr += 64; } cpu_inst_cnt = 1; ulong rd_addr = copy_to_req_addr_q.Dequeue(); rd_req = RequestPool.Depool(); rd_req.Set(pid, ReqType.READ, rd_addr); // For the destination addr, we need to mark it as dirty when the data is inserted back into the LLC. rd_req.DirtyInsert = dirty_insert; dirty_insert = !dirty_insert; rd_req.CpyGenReq = Config.proc.stats_exclude_cpy; wb_req = null; Stat.banks[rd_req.Addr.cid, rd_req.Addr.rid, rd_req.Addr.bid].cmd_base_inter_sa.collect(); copy_to_req_ipc_deduction += 2; } else { rd_req = RequestPool.Depool(); rd_req.Set(pid, ReqType.COPY, src_addr); wb_req = null; } } else { rd_req = null; wb_req = null; Dbg.AssertPrint(inst_type == "C" || inst_type == "R", "Unable to fetch valid instruction."); } }
public void tick() { /*** Preamble ***/ cycles++; Stat.procs[pid].cycle.collect(); #if DEBUG if (cycles % 1000000 == 0) { Console.WriteLine("Cycles {0} IPC {1}", cycles, (double)(Stat.procs[pid].ipc.Count) / cycles); } #endif //starved for way too long: something's wrong if (consec_stalled > 1000000) { string str = "Cycles=" + cycles + " -- Inst Window stalled for too long: window head=" + inst_wnd.head(); Dbg.AssertPrint(false, str); } // STATS ulong inst_cnt = Stat.procs[pid].ipc.Count; retired_inst_stats(inst_cnt); // Check cache hits if (Config.proc.cache_enabled) { service_cache_hit_queue(l1c); service_cache_hit_queue(l2c, l1c); } /*** Throttle ***/ if (throttle_fraction > 0) { if (rand.NextDouble() < throttle_fraction) { return; } } /*** Retire ***/ int retired = inst_wnd.retire(Config.proc.ipc); // Deduct those instructions due to expanded copy requests if (Config.mctrl.copy_method == COPY.MEMCPY) { if (trace.copy_to_req_ipc_deduction > 0 && retired > 0) { if (trace.copy_to_req_ipc_deduction > (ulong)retired) { trace.copy_to_req_ipc_deduction -= (ulong)retired; retired = 0; } else { retired -= (int)trace.copy_to_req_ipc_deduction; trace.copy_to_req_ipc_deduction = 0; } } } Stat.procs[pid].ipc.collect(retired); Stat.caches[0].total_system_inst_executed.collect(retired); inst_count++; if (retired > 0) { consec_stalled = 0; } else { consec_stalled++; } /*** Issue writeback request ***/ if (Config.proc.wb && wb_q.Count > 0) { bool wb_ok = issue_wb_req(wb_q[0]); if (wb_ok) { wb_q.RemoveAt(0); } //writeback stall bool stalled_wb = wb_q.Count > Config.proc.wb_q_max; if (stalled_wb) { return; } } /*** Reissue previous read request ***/ bool issued_rd_req = false; if (mshr_retry || mctrl_retry) { Dbg.Assert(curr_rd_req != null && curr_cpu_inst_cnt == 0); //mshr/mctrl stall bool reissue_ok = reissue_rd_req(); if (!reissue_ok) { return; } //reissue success Dbg.Assert(!mshr_retry && !mctrl_retry); issued_rd_req = true; curr_rd_req = get_req(); } /*** Issue instructions ***/ Dbg.Assert(curr_rd_req != null); issue_insts(issued_rd_req); }
public void issue_insts(bool issued_rd_req) { //issue instructions for (int i = 0; i < Config.proc.ipc; i++) { Dbg.Assert(curr_rd_req != null); if (curr_rd_req == null) { return; } // Stats if (inst_wnd.is_full()) { if (i == 0) { Stat.procs[pid].stall_inst_wnd.collect(); consec_stalled++; } return; } //cpu instructions if (curr_cpu_inst_cnt > 0) { curr_cpu_inst_cnt--; inst_wnd.add(0, false, true, 0); // word oblivious continue; } //only one memory instruction can be issued per cycle if (issued_rd_req) { return; } // Ideal memory if (Config.proc.ideal_memory) { Dbg.AssertPrint(!Config.proc.cache_enabled, "Cache is not supported in ideal memory mode."); if (curr_rd_req.WbReq != null) { RequestPool.Enpool(curr_rd_req.WbReq); } RequestPool.Enpool(curr_rd_req); curr_rd_req = get_req(); return; } // Need to mark if an instruction is a write on cache mode or COPY for a copy instruction inst_wnd.add(curr_rd_req.BlockAddr, true, false, curr_rd_req.WordOffset, (curr_rd_req.Type == ReqType.WRITE) && Config.proc.cache_enabled, curr_rd_req.Type == ReqType.COPY); // check if true miss -- bool false_miss = inst_wnd.is_duplicate(curr_rd_req.BlockAddr); // COPY is a special instruction, so we don't care about if its address is a duplicate of other instructions if (false_miss && Config.proc.issue_on_dup_req && curr_rd_req.Type != ReqType.COPY) { Dbg.Assert(curr_rd_req.WbReq == null); RequestPool.Enpool(curr_rd_req); curr_rd_req = get_req(); continue; } // STATS collect_inst_stats(); // Caches if (Config.proc.cache_enabled && curr_rd_req.Type != ReqType.COPY) { // Check for in-flight rd_wr_q. // Since write is duplicate, drop it.... bool in_rd_wr_q = read_write_q.Contains(curr_rd_req.BlockAddr); // L1 if (l1c.is_cache_hit(curr_rd_req.BlockAddr, curr_rd_req.Type)) { Dbg.AssertPrint(!in_rd_wr_q, "Both in rd_wr_q and L1 cache baddr=" + curr_rd_req.BlockAddr); // HIT: Add to l1 cache hit queue to model the latency add_cache_hit_queue(l1c, curr_rd_req); curr_rd_req = get_req(); issued_rd_req = true; continue; } // L2 if (l2c.is_cache_hit(curr_rd_req.BlockAddr, curr_rd_req.Type)) { Dbg.Assert(!in_rd_wr_q); // HIT: Add to l2 cache hit queue to model the latency, // add to l1 cache after it is served from the hit queue add_cache_hit_queue(l2c, curr_rd_req); curr_rd_req = get_req(); issued_rd_req = true; continue; } if (in_rd_wr_q) { if (curr_rd_req.Type == ReqType.WRITE) { inst_wnd.set_ready(curr_rd_req.BlockAddr); } RequestPool.Enpool(curr_rd_req); curr_rd_req = get_req(); issued_rd_req = true; continue; } // If write allocate -- 1. need to make sure the following read request // detects this reading request generated from write // 2. don't stall the instruction window // Make it into a read request, then on receving the // request, put them into the cache and mark them dirty. if (curr_rd_req.Type == ReqType.WRITE) { convert_to_read_write(ref curr_rd_req); } } // **** GO TO MEMORY **** //try mshr bool mshr_ok = insert_mshr(curr_rd_req); if (!mshr_ok) { mshr_retry = true; return; } //try memory controller bool mctrl_ok = insert_mctrl(curr_rd_req); if (!mctrl_ok) { mctrl_retry = true; return; } //issued memory request issued_rd_req = true; //get new read request curr_rd_req = get_req(); } }
//constructor public DDR3DRAM(DDR3Enum type, uint clock_factor, int tRTRS, uint tWR, uint tWTR, uint tBL, uint bank_max, uint subarray_max, uint col_max, uint tRA, uint tWA, int tREFI, int tRFC, int tRP, int tRCD) { timing = new Timing(); switch (type) { case DDR3Enum.DDR3_4Gb_x8_1066_8_8_8: DDR3_4Gb_x8_1066_8_8_8(); break; case DDR3Enum.DDR3_4Gb_x8_1333_10_10_10: DDR3_4Gb_x8_1333_10_10_10(); break; case DDR3Enum.DDR3_4Gb_x8_1600_11_11_11: DDR3_4Gb_x8_1600_11_11_11(); break; case DDR3Enum.DDR3_8Gb_x8_1066_7_7_7: DDR3_8Gb_x8_1066_7_7_7(); break; case DDR3Enum.DDR3_8Gb_x8_1333_9_9_9: DDR3_8Gb_x8_1333_9_9_9(); break; case DDR3Enum.LPDDR3_8Gb_x8_1333: LPDDR3_8Gb_x8_1333(); break; case DDR3Enum.DDR3_8Gb_x8_1600_11_11_11: DDR3_8Gb_x8_1600_11_11_11(); break; case DDR3Enum.DDR3_4Gb_x8_1866_13_13_13: DDR3_4Gb_x8_1866_13_13_13(); break; default: throw new Exception("Invalid DRAM type."); } if (tRTRS != -1) { timing.tRTRS = (uint)tRTRS; } if (tWR != 0) { timing.tWR = tWR; } if (tWTR != 0) { timing.tWTR = tWTR; } if (tREFI != -1) { timing.tREFI = (ulong)Math.Floor(tREFI / timing.tCK); // in ns } if (tRFC != -1) { timing.tRFC = (uint)Math.Ceiling(tRFC / timing.tCK); // in ns } if (Config.mem.tRAS != -1) { timing.tRAS = (uint)Config.mem.tRAS; timing.tRC = timing.tRP + timing.tRAS; } if (tRP != -1) { timing.tRP = (uint)tRP; timing.tRC = timing.tRP + timing.tRAS; } if (tRCD != -1) { // Just tRCD without updating tRAS or tRC values timing.tRCD = (uint)tRCD; } // Configurable tFAWs and tRRDs if (Config.mem.tFAW != 0) { timing.tFAW = Config.mem.tFAW; timing.tRRD = Config.mem.tFAW / 5; } if (tBL != 0 && tBL > timing.tBL) { timing.tBL = tBL; /* COL-to-FAKT */ timing.tRA = timing.tBL; timing.tWA = timing.tCWL + timing.tBL + (timing.tWR / 2); /* COL-to-PRE */ timing.tRTP = timing.tBL; //tWTP is covered by (tCWL + tBL + tWR) /* COL-to-COL */ timing.tCCD = timing.tBL; timing.tRTW = timing.tCL - timing.tCWL + timing.tBL + 2; //tWTR is covered by (tCWL + tBL + tWTR) } if (bank_max != 0) { BANK_MAX = bank_max; } if (subarray_max != 0) { SUBARRAYS_PER_BANK = subarray_max; } else { Dbg.AssertPrint(Config.mctrl.salp == SALP.NONE, "The number of subarray needs to be > 0 in SALP mode."); } if (col_max != 0) { COL_MAX = col_max; } if (tRA != 0) { timing.tRA = tRA; } if (tWA != 0) { timing.tWA = tWA; } // Scale the memory latency with respective to the processor's frequency timing.Scale(clock_factor, COL_MAX); }