Esempio n. 1
0
 void send_pkt(CmpCache_Txn txn, CmpCache_Pkt pkt)
 {
     if (pkt.delay > 0)
     {
         ulong due = Simulator.CurrentRound + pkt.delay;
         pkt.delay = 0;
         Simulator.Defer(delegate()
         {
             send_pkt(txn, pkt);
         }, due);
     }
     else if (pkt.send)
     {
         send_noc(txn.node, pkt.from, pkt.to, pkt.flits,
                  delegate()
         {
             pkt_callback(txn, pkt);
         }, pkt.off_crit, pkt.vc_class);
     }
     else if (pkt.mem)
     {
         access_mem(pkt.mem_requestor, pkt.mem_addr, pkt.mem_write,
                    delegate()
         {
             pkt_callback(txn, pkt);
         });
     }
     else
     {
         pkt_callback(txn, pkt);
     }
 }
Esempio n. 2
0
        CmpCache_Pkt _add_pkt(CmpCache_Txn txn, int from, int to, bool data, bool send, bool done)
        {
            Debug.Assert(to >= 0 && to < m_N);

            CmpCache_Pkt pkt = new CmpCache_Pkt();

            pkt.wakeup = new List <CmpCache_Pkt>();
            pkt.id     = pkt_id++;
            pkt.from   = from;
            pkt.to     = to;
            pkt.txn    = txn;

            pkt.flits = data ? m_datapkt_size : 1;

            pkt.vc_class = 0; // gets filled in once DAG is complete

            pkt.done = done;
            pkt.send = send;

            pkt.deps     = 0;
            pkt.delay    = 0;
            pkt.mem_addr = 0;

            txn.n_pkts++;
            txn.n_pkts_remaining++;

            if (txn.pkts == null)
            {
                txn.pkts = pkt;
            }

            return(pkt);
        }
Esempio n. 3
0
 private Queue <CmpCache_Pkt> workQ = new Queue <CmpCache_Pkt>(); // avoid alloc'ing this for each call
 void assignVCclasses(CmpCache_Pkt root)
 {
     // basic idea: we traverse the DAG using a work-list algorithm, assigning VC classes as follows:
     //  - any network packet node sets the VC of its successors to at least its own VC plus 1.
     //  - any data packet gets VC at least 4.
     //  - non-network-packet nodes carry VC numbers anyway to propagate dependence information.
     //  - VC classes start at 0 and increment as this algo runs.
     workQ.Enqueue(root);
     while (workQ.Count > 0)
     {
         CmpCache_Pkt pkt = workQ.Dequeue();
         if (pkt.flits > 1)
         {
             pkt.vc_class = Math.Max(4, pkt.vc_class);
         }
         int succ = pkt.send ? pkt.vc_class + 1 : pkt.vc_class;
         foreach (CmpCache_Pkt s in pkt.wakeup)
         {
             int old = s.vc_class;
             s.vc_class = Math.Max(succ, s.vc_class);
             if (s.vc_class > old)
             {
                 workQ.Enqueue(s);
             }
         }
     }
 }
Esempio n. 4
0
        CmpCache_Pkt do_inval(CmpCache_Txn txn, CmpCache_State state, CmpCache_Pkt init_dep, int node, ulong addr, int c2c)
        {
            int sh_slice = map_addr(node, addr);

            // join-point (virtual packet). this is the completion point (DONE flag)
            CmpCache_Pkt invl_join = add_joinpt(txn, true);

            // invalidate from shared slice to each other owner
            for (int i = 0; i < m_N; i++)
            {
                if (state.owners.is_set(i) && i != node)
                {
                    CmpCache_Pkt invl_pkt = add_ctl_pkt(txn, sh_slice, i, false);
                    invl_pkt.delay = m_shdelay;

                    CmpCache_Pkt invl_resp =
                        (c2c == i) ?
                        add_data_pkt(txn, i, node, false) :
                        add_ctl_pkt(txn, i, node, false);
                    invl_resp.delay = m_prvdelay;

                    add_dep(init_dep, invl_pkt);
                    add_dep(invl_pkt, invl_resp);
                    add_dep(invl_resp, invl_join);

                    // invalidate in this prv cache.
                    bool evicted_data;
                    m_prv[i].inval(addr, out evicted_data);
                }
            }

            return(invl_join);
        }
Esempio n. 5
0
        // evict a block from given node, and construct either writeback or release packet.
        // updates functional state accordingly.
        void do_evict(CmpCache_Txn txn, CmpCache_Pkt init_dep, int node, ulong evict_addr, out bool wb)
        {
            ulong blk      = evict_addr >> m_blkshift;
            int   sh_slice = map_addr(node, evict_addr);

            CmpCache_State evicted_st;

            if (m_sh_perfect)
            {
                Debug.Assert(m_perf_sh.ContainsKey(blk));
                evicted_st = m_perf_sh[blk];
            }
            else
            {
                bool hit = m_sh.probe(evict_addr, out evicted_st);
                Debug.Assert(hit); // inclusive sh cache -- MUST be present in sh cache
            }

            if (evicted_st.excl == node && evicted_st.modified)
            {
                CmpCache_Pkt wb_pkt = add_data_pkt(txn, node, sh_slice, false);
                wb_pkt.delay = m_opdelay; // pass-through delay: operation already in progress
                add_dep(init_dep, wb_pkt);

                evicted_st.owners.reset();
                evicted_st.excl     = -1;
                evicted_st.sh_dirty = true;
                wb = true;
            }
            else
            {
                CmpCache_Pkt release_pkt = add_ctl_pkt(txn, node, sh_slice, false);
                release_pkt.delay = m_opdelay;
                add_dep(init_dep, release_pkt);

                evicted_st.owners.unset(node);
                if (evicted_st.excl == node)
                {
                    evicted_st.excl = -1;
                }
                wb = false;
            }

            if (m_sh_perfect && !evicted_st.owners.any_set())
            {
                m_perf_sh.Remove(blk);
            }
        }
Esempio n. 6
0
        void pkt_callback(CmpCache_Txn txn, CmpCache_Pkt pkt)
        {
            txn.n_pkts_remaining--;

            if (pkt.done)
            {
                txn.cb();
            }


            foreach (CmpCache_Pkt dep in pkt.wakeup)
            {
                if (pkt.done || pkt.off_crit)
                {
                    dep.off_crit = true;
                }

                dep.deps--;
                if (dep.deps == 0)
                {
                    send_pkt(txn, dep);
                }
            }
        }
Esempio n. 7
0
 void add_dep(CmpCache_Pkt from, CmpCache_Pkt to)
 {
     from.wakeup.Add(to);
     to.deps++;
 }
Esempio n. 8
0
 // construct a set of invalidation packets, all depending on init_dep, and
 // joining at a join-point that we return. Also invalidate the given addr
 // in the other prv caches.
 CmpCache_Pkt do_inval(CmpCache_Txn txn, CmpCache_State state, CmpCache_Pkt init_dep, int node, ulong addr)
 {
     return(do_inval(txn, state, init_dep, node, addr, -1));
 }
Esempio n. 9
0
        // evict a block from given node, and construct either writeback or release packet.
        // updates functional state accordingly.
        void do_evict(CmpCache_Txn txn, CmpCache_Pkt init_dep, int node, ulong evict_addr, out bool wb)
        {
            ulong blk = evict_addr >> m_blkshift;
            int sh_slice = map_addr(node, evict_addr);

            CmpCache_State evicted_st;
            if (m_sh_perfect)
            {
                Debug.Assert(m_perf_sh.ContainsKey(blk));
                evicted_st = m_perf_sh[blk];
            }
            else
            {
                bool hit = m_sh.probe(evict_addr, out evicted_st);
                Debug.Assert(hit); // inclusive sh cache -- MUST be present in sh cache
            }

            if(evicted_st.excl == node && evicted_st.modified)
            {
                CmpCache_Pkt wb_pkt = add_data_pkt(txn, node, sh_slice, false);
                wb_pkt.delay = m_opdelay; // pass-through delay: operation already in progress
                add_dep(init_dep, wb_pkt);

                evicted_st.owners.reset();
                evicted_st.excl = -1;
                evicted_st.sh_dirty = true;
                wb = true;
            }
            else
            {
                CmpCache_Pkt release_pkt = add_ctl_pkt(txn, node, sh_slice, false);
                release_pkt.delay = m_opdelay;
                add_dep(init_dep, release_pkt);

                evicted_st.owners.unset(node);
                if (evicted_st.excl == node) evicted_st.excl = -1;
                wb = false;
            }

            if (m_sh_perfect && !evicted_st.owners.any_set())
                m_perf_sh.Remove(blk);
        }
Esempio n. 10
0
        private Queue<CmpCache_Pkt> workQ = new Queue<CmpCache_Pkt>(); // avoid alloc'ing this for each call
        void assignVCclasses(CmpCache_Pkt root)
        {
            // basic idea: we traverse the DAG using a work-list algorithm, assigning VC classes as follows:
            //  - any network packet node sets the VC of its successors to at least its own VC plus 1.
            //  - any data packet gets VC at least 4.
            //  - non-network-packet nodes carry VC numbers anyway to propagate dependence information.
            //  - VC classes start at 0 and increment as this algo runs.
            workQ.Enqueue(root);
            while (workQ.Count > 0)
            {
                CmpCache_Pkt pkt = workQ.Dequeue();
                if (pkt.flits > 1) pkt.vc_class = Math.Max(4, pkt.vc_class);

                int succ = pkt.send ? pkt.vc_class + 1 : pkt.vc_class;
                foreach (CmpCache_Pkt s in pkt.wakeup)
                {
                    int old = s.vc_class;
                    s.vc_class = Math.Max(succ, s.vc_class);
                    if (s.vc_class > old) workQ.Enqueue(s);
                }
            }
        }
Esempio n. 11
0
        void pkt_callback(CmpCache_Txn txn, CmpCache_Pkt pkt)
        {
            txn.n_pkts_remaining--;

            if (pkt.done)
                txn.cb();


            foreach (CmpCache_Pkt dep in pkt.wakeup)
            {
                if (pkt.done || pkt.off_crit) dep.off_crit = true;

                dep.deps--;
                if (dep.deps == 0)
                    send_pkt(txn, dep);
            }
        }
Esempio n. 12
0
 void send_pkt(CmpCache_Txn txn, CmpCache_Pkt pkt)
 {
     if (pkt.delay > 0)
     {
         ulong due = Simulator.CurrentRound + pkt.delay;
         pkt.delay = 0;
         Simulator.Defer(delegate()
                 {
                 send_pkt(txn, pkt);
                 }, due);
     }
     else if (pkt.send)
     {
         send_noc(txn.node, pkt.from, pkt.to, pkt.flits,
                 delegate()
                 {
                 pkt_callback(txn, pkt);
                 }, pkt.off_crit, pkt.vc_class);
     }
     else if (pkt.mem)
     {
         access_mem(pkt.mem_requestor, pkt.mem_addr, pkt.mem_write,
                 delegate()
                 {
                 pkt_callback(txn, pkt);
                 });
     }
     else
         pkt_callback(txn, pkt);
 }
Esempio n. 13
0
 void add_dep(CmpCache_Pkt from, CmpCache_Pkt to)
 {
     from.wakeup.Add(to);
     to.deps++;
 }
Esempio n. 14
0
        CmpCache_Pkt _add_pkt(CmpCache_Txn txn, int from, int to, bool data, bool send, bool done)
        {
            Debug.Assert(to >= 0 && to < m_N);

            CmpCache_Pkt pkt = new CmpCache_Pkt();
            pkt.wakeup = new List<CmpCache_Pkt>();
            pkt.id = pkt_id++;
            pkt.from = from;
            pkt.to = to;
            pkt.txn = txn;

            pkt.flits = data ? m_datapkt_size : 1;
            pkt.vc_class = 0; // gets filled in once DAG is complete

            pkt.done = done;
            pkt.send = send;

            pkt.deps = 0;
            pkt.delay = 0;
            pkt.mem_addr = 0;

            txn.n_pkts++;
            txn.n_pkts_remaining++;

            if (txn.pkts == null)
                txn.pkts = pkt;

            return pkt;
        }
Esempio n. 15
0
        CmpCache_Pkt do_inval(CmpCache_Txn txn, CmpCache_State state, CmpCache_Pkt init_dep, int node, ulong addr, int c2c)
        {
            int sh_slice = map_addr(node, addr);

            // join-point (virtual packet). this is the completion point (DONE flag)
            CmpCache_Pkt invl_join = add_joinpt(txn, true);

            // invalidate from shared slice to each other owner
            for (int i = 0; i < m_N; i++)
                if (state.owners.is_set(i) && i != node)
                {
                    CmpCache_Pkt invl_pkt = add_ctl_pkt(txn, sh_slice, i, false);
                    invl_pkt.delay = m_shdelay;

                    CmpCache_Pkt invl_resp =
                        (c2c == i) ?
                        add_data_pkt(txn, i, node, false) :
                        add_ctl_pkt(txn, i, node, false);
                    invl_resp.delay = m_prvdelay;

                    add_dep(init_dep, invl_pkt);
                    add_dep(invl_pkt, invl_resp);
                    add_dep(invl_resp, invl_join);

                    // invalidate in this prv cache.
                    bool evicted_data;
                    m_prv[i].inval(addr, out evicted_data);
                }

            return invl_join;
        }
Esempio n. 16
0
 // construct a set of invalidation packets, all depending on init_dep, and
 // joining at a join-point that we return. Also invalidate the given addr
 // in the other prv caches.
 CmpCache_Pkt do_inval(CmpCache_Txn txn, CmpCache_State state, CmpCache_Pkt init_dep, int node, ulong addr)
 {
     return do_inval(txn, state, init_dep, node, addr, -1);
 }
Esempio n. 17
0
        public void access(int node, ulong addr, bool write, Simulator.Ready cb,
                           out bool L1hit, out bool L1upgr, out bool L1ev, out bool L1wb,
                           out bool L2access, out bool L2hit, out bool L2ev, out bool L2wb, out bool c2c)
        {
            CmpCache_Txn txn      = null;
            int          sh_slice = map_addr(node, addr);

            // ------------- first, we probe the cache (private, and shared if necessary) to
            //               determine current state.

            // probe private cache
            CmpCache_State state;
            bool           prv_state;
            bool           prv_hit = m_prv[node].probe(addr, out prv_state);

            bool sh_hit = false;

            if (m_sh_perfect)
            {
                ulong blk = addr >> m_blkshift;
                sh_hit = true;
                if (m_perf_sh.ContainsKey(blk))
                {
                    state = m_perf_sh[blk];
                }
                else
                {
                    state          = new CmpCache_State();
                    m_perf_sh[blk] = state;
                }
            }
            else
            {
                sh_hit = m_sh.probe(addr, out state);
            }

            bool prv_excl = sh_hit ? (state.excl == node) : false;

            if (prv_hit)
            {
                // we always update the timestamp on the private cache
                m_prv[node].update(addr, Simulator.CurrentRound);
            }

            // out-params
            L1hit    = prv_hit;
            L1upgr   = L1hit && !prv_excl;
            L2hit    = sh_hit;
            c2c      = false; // will be set below for appropriate cases
            L1ev     = false; // will be set below
            L1wb     = false; // will be set below
            L2ev     = false; // will be set below
            L2wb     = false; // will be set below
            L2access = false; // will be set below

            // ----------------- now, we execute one of four cases:
            //                   1a. present in private cache, with appropriate ownership.
            //                   1b. present in private cache, but not excl (for a write)
            //                   2. not present in private cache, but in shared cache.
            //                   3. not present in private or shared cache.
            //
            // in each case, we update functional state and generate the packet DAG as we go.

            if (prv_hit && (!write || prv_excl)) // CASE 1a: present in prv cache, have excl if write
            {
                // just set modified-bit in state, then we're done (no protocol interaction)
                if (write)
                {
                    state.modified = true;
                }
            }
            else if (prv_hit && write && !prv_excl) // CASE 1b: present in prv cache, need upgr
            {
                txn      = new CmpCache_Txn();
                txn.node = node;

                // request packet
                CmpCache_Pkt req_pkt  = add_ctl_pkt(txn, node, sh_slice, false);
                CmpCache_Pkt done_pkt = null;

                // present in others?
                if (state.owners.others_set(node))
                {
                    done_pkt = do_inval(txn, state, req_pkt, node, addr);
                }
                else
                {
                    // not present in others, but we didn't have excl -- send empty grant
                    // (could happen if others have evicted and we are the only one left)
                    done_pkt       = add_ctl_pkt(txn, sh_slice, node, true);
                    done_pkt.delay = m_shdelay;
                    add_dep(req_pkt, done_pkt);
                }

                state.owners.reset();
                state.owners.set(node);
                state.excl     = node;
                state.modified = true;
            }
            else if (!prv_hit && sh_hit) // CASE 2: not in prv cache, but in sh cache
            {
                txn      = new CmpCache_Txn();
                txn.node = node;

                // update functional shared state
                if (!m_sh_perfect)
                {
                    m_sh.update(addr, Simulator.CurrentRound);
                }

                // request packet
                CmpCache_Pkt req_pkt  = add_ctl_pkt(txn, node, sh_slice, false);
                CmpCache_Pkt done_pkt = null;

                if (state.owners.any_set())   // in other caches?
                {
                    if (write)                // need to invalidate?
                    {
                        if (state.excl != -1) // someone else has exclusive -- c-to-c xfer
                        {
                            c2c = true;       // out-param

                            CmpCache_Pkt xfer_req = add_ctl_pkt(txn, sh_slice, state.excl, false);
                            CmpCache_Pkt xfer_dat = add_data_pkt(txn, state.excl, node, true);
                            done_pkt = xfer_dat;

                            xfer_req.delay = m_shdelay;
                            xfer_dat.delay = m_prvdelay;

                            add_dep(req_pkt, xfer_req);
                            add_dep(xfer_req, xfer_dat);

                            bool evicted_state;
                            m_prv[state.excl].inval(addr, out evicted_state);
                        }
                        else // others have it -- inval to all, c-to-c from closest
                        {
                            int close = closest(node, state.owners);
                            if (close != -1)
                            {
                                c2c = true;              // out-param
                            }
                            done_pkt = do_inval(txn, state, req_pkt, node, addr, close);
                        }

                        // for a write, we need exclusive -- update state
                        state.owners.reset();
                        state.owners.set(node);
                        state.excl     = node;
                        state.modified = true;
                    }
                    else // just a read -- joining sharer set, c-to-c from closest
                    {
                        if (state.excl != -1)
                        {
                            CmpCache_Pkt xfer_req = add_ctl_pkt(txn, sh_slice, state.excl, false);
                            CmpCache_Pkt xfer_dat = add_data_pkt(txn, state.excl, node, true);
                            done_pkt = xfer_dat;

                            c2c = true; // out-param

                            xfer_req.delay = m_shdelay;
                            xfer_dat.delay = m_prvdelay;

                            add_dep(req_pkt, xfer_req);
                            add_dep(xfer_req, xfer_dat);

                            // downgrade must also trigger writeback
                            if (state.modified)
                            {
                                CmpCache_Pkt wb_dat = add_data_pkt(txn, state.excl, sh_slice, false);
                                add_dep(xfer_req, wb_dat);
                                state.modified = false;
                                state.sh_dirty = true;
                            }
                        }
                        else
                        {
                            int close = closest(node, state.owners);
                            if (close != -1)
                            {
                                c2c = true;              // out-param
                            }
                            CmpCache_Pkt xfer_req = add_ctl_pkt(txn, sh_slice, close, false);
                            CmpCache_Pkt xfer_dat = add_data_pkt(txn, close, node, true);
                            done_pkt = xfer_dat;

                            xfer_req.delay = m_shdelay;
                            xfer_dat.delay = m_prvdelay;

                            add_dep(req_pkt, xfer_req);
                            add_dep(xfer_req, xfer_dat);
                        }

                        state.owners.set(node);
                        state.excl = -1;
                    }
                }
                else
                {
                    // not in other prv caches, need to get from shared slice
                    L2access = true;

                    CmpCache_Pkt dat_resp = add_data_pkt(txn, sh_slice, node, true);
                    done_pkt = dat_resp;

                    add_dep(req_pkt, done_pkt);

                    dat_resp.delay = m_shdelay;

                    state.owners.reset();
                    state.owners.set(node);
                    state.excl     = node;
                    state.modified = write;
                }

                // insert into private cache, get evicted block (if any)
                ulong evict_addr;
                bool  evict_data;
                bool  evicted = m_prv[node].insert(addr, true, out evict_addr, out evict_data, Simulator.CurrentRound);

                // add either a writeback or a release packet
                if (evicted)
                {
                    L1ev = true;
                    do_evict(txn, done_pkt, node, evict_addr, out L1wb);
                }
            }
            else if (!prv_hit && !sh_hit) // CASE 3: not in prv or shared cache
            {
                // here, we need to go to memory
                Debug.Assert(!m_sh_perfect);

                txn      = new CmpCache_Txn();
                txn.node = node;

                L2access = true;

                // request packet
                CmpCache_Pkt req_pkt = add_ctl_pkt(txn, node, sh_slice, false);

                // cache response packet
                CmpCache_Pkt resp_pkt = add_data_pkt(txn, sh_slice, node, true);
                resp_pkt.delay = m_opdelay; // req already active -- just a pass-through op delay here

                // memory request packet
                int          mem_slice  = map_addr_mem(node, addr);
                CmpCache_Pkt memreq_pkt = add_ctl_pkt(txn, sh_slice, mem_slice, false);
                memreq_pkt.delay = m_shdelay;

                // memory-access virtual node
                CmpCache_Pkt mem_access = add_ctl_pkt(txn, 0, 0, false);
                mem_access.send          = false;
                mem_access.mem           = true;
                mem_access.mem_addr      = addr;
                mem_access.mem_write     = false; // cache-line fill
                mem_access.mem_requestor = node;

                // memory response packet
                CmpCache_Pkt memresp_pkt = add_data_pkt(txn, mem_slice, sh_slice, false);

                // connect up the critical path first
                add_dep(req_pkt, memreq_pkt);
                add_dep(memreq_pkt, mem_access);
                add_dep(mem_access, memresp_pkt);
                add_dep(memresp_pkt, resp_pkt);

                // now, handle replacement in the shared cache...
                CmpCache_State new_state = new CmpCache_State();

                new_state.owners.reset();
                new_state.owners.set(node);
                new_state.excl     = node;
                new_state.modified = write;
                new_state.sh_dirty = false;

                ulong          sh_evicted_addr;
                CmpCache_State sh_evicted_state;
                bool           evicted = m_sh.insert(addr, new_state, out sh_evicted_addr, out sh_evicted_state, Simulator.CurrentRound);

                if (evicted)
                {
                    // shared-cache eviction (different from the private-cache evictions elsewhere):
                    // we must evict any private-cache copies, because we model an inclusive hierarchy.

                    L2ev = true;

                    CmpCache_Pkt prv_evict_join = add_joinpt(txn, false);

                    if (sh_evicted_state.excl != -1) // evicted block lives only in one prv cache
                    {
                        // invalidate request to prv cache before sh cache does eviction
                        CmpCache_Pkt prv_invl = add_ctl_pkt(txn, sh_slice, sh_evicted_state.excl, false);
                        add_dep(memresp_pkt, prv_invl);
                        CmpCache_Pkt prv_wb;

                        prv_invl.delay = m_opdelay;

                        if (sh_evicted_state.modified)
                        {
                            // writeback
                            prv_wb       = add_data_pkt(txn, sh_evicted_state.excl, sh_slice, false);
                            prv_wb.delay = m_prvdelay;
                            sh_evicted_state.sh_dirty = true;
                        }
                        else
                        {
                            // simple ACK
                            prv_wb       = add_ctl_pkt(txn, sh_evicted_state.excl, sh_slice, false);
                            prv_wb.delay = m_prvdelay;
                        }

                        add_dep(prv_invl, prv_wb);
                        add_dep(prv_wb, prv_evict_join);

                        bool prv_evicted_dat;
                        m_prv[sh_evicted_state.excl].inval(sh_evicted_addr, out prv_evicted_dat);
                    }
                    else if (sh_evicted_state.owners.any_set()) // evicted block has greater-than-one sharer set
                    {
                        for (int i = 0; i < m_N; i++)
                        {
                            if (sh_evicted_state.owners.is_set(i))
                            {
                                CmpCache_Pkt prv_invl = add_ctl_pkt(txn, sh_slice, i, false);
                                CmpCache_Pkt prv_ack  = add_ctl_pkt(txn, i, sh_slice, false);

                                prv_invl.delay = m_opdelay;
                                prv_ack.delay  = m_prvdelay;

                                add_dep(memresp_pkt, prv_invl);
                                add_dep(prv_invl, prv_ack);
                                add_dep(prv_ack, prv_evict_join);

                                bool prv_evicted_dat;
                                m_prv[i].inval(sh_evicted_addr, out prv_evicted_dat);
                            }
                        }
                    }
                    else // evicted block has no owners (was only in shared cache)
                    {
                        add_dep(memresp_pkt, prv_evict_join);
                    }

                    // now writeback to memory, if we were dirty
                    if (sh_evicted_state.sh_dirty)
                    {
                        CmpCache_Pkt mem_wb = add_data_pkt(txn, sh_slice, mem_slice, false);
                        mem_wb.delay = m_opdelay;
                        add_dep(prv_evict_join, mem_wb);
                        CmpCache_Pkt mem_wb_op = add_ctl_pkt(txn, 0, 0, false);
                        mem_wb_op.send          = false;
                        mem_wb_op.mem           = true;
                        mem_wb_op.mem_addr      = sh_evicted_addr;
                        mem_wb_op.mem_write     = true;
                        mem_wb_op.mem_requestor = node;
                        add_dep(mem_wb, mem_wb_op);
                        L2wb = true;
                    }
                }

                // ...and insert and handle replacement in the private cache
                ulong evict_addr;
                bool  evict_data;
                bool  prv_evicted = m_prv[node].insert(addr, true, out evict_addr, out evict_data, Simulator.CurrentRound);

                // add either a writeback or a release packet
                if (prv_evicted)
                {
                    L1ev = true;
                    do_evict(txn, resp_pkt, node, evict_addr, out L1wb);
                }
            }
            else // shouldn't happen.
            {
                Debug.Assert(false);
            }

            // now start the transaction, if one was needed
            if (txn != null)
            {
                txn.cb = cb;

                assignVCclasses(txn.pkts);

                // start running the protocol DAG. It may be an empty graph (for a silent upgr), in
                // which case the deferred start (after cache delay)
                Simulator.Defer(delegate()
                {
                    start_pkts(txn);
                }, Simulator.CurrentRound + m_prvdelay);
            }
            // no transaction -- just the cache access delay. schedule deferred callback.
            else
            {
                Simulator.Defer(cb, Simulator.CurrentRound + m_prvdelay);
            }
        }
Esempio n. 18
0
        public void access(int node, ulong addr, bool write, Simulator.Ready cb,
                           out bool L1hit, out bool L1upgr, out bool L1ev, out bool L1wb,
                           out bool L2access, out bool L2hit, out bool L2ev, out bool L2wb, out bool c2c)
        {
            CmpCache_Txn txn      = null;
            int          sh_slice = map_addr(node, addr);
            bool         sh_hit   = false;
            // probe private cache
            CmpCache_State state = new CmpCache_State();
            bool           prv_state;
            bool           prv_hit = m_prv[node].probe(addr, out prv_state);

            // -- GPU - TODO: need to fix this
            if (Simulator.network.nodes[node].cpu.is_GPU())
            {
                prv_hit = false;
                sh_hit  = false;
            }
            /* HWA CODE */
            else if (Simulator.network.nodes[node].cpu.is_HWA() ||
                     Config.is_through_all_cache)
            {
                prv_hit = false;
                sh_hit  = false;
            }
            /* HWA CODE END */
            else
            {
                // ------------- first, we probe the cache (private, and shared if necessary) to
                //               determine current state.

                if (m_sh_perfect)
                {
                    ulong blk = addr >> m_blkshift;
                    sh_hit = true;
                    if (m_perf_sh.ContainsKey(blk))
                    {
                        state = m_perf_sh[blk];
                    }
                    else
                    {
                        m_perf_sh[blk] = state;
                    }
                }
                else
                {
                    sh_hit = m_sh.probe(addr, out state);
                }

                /////// TODO: buggy code. Fix (remove) this!!

                /*Hashtable pHT = Simulator.network.nodes[node].cpu.addr_l2m;
                 * if (pHT.ContainsKey(addr))
                 * {
                 *  //if ((bool)pHT[addr] != sh_hit)
                 *  //   throw new Exception(String.Format("Inconsistent value for L2 status. stored {0} addr {1}",pHT[addr],addr));
                 *  pHT.Remove(addr);
                 *  pHT.Add(addr,sh_hit);
                 * }
                 * else
                 *  pHT.Add(addr,sh_hit);*/
                ///////
            }
            bool prv_excl = sh_hit ? (state.excl == node) : false;

            if (prv_hit)
            {
                // we always update the timestamp on the private cache
                m_prv[node].update(addr, Simulator.CurrentRound);
            }

            // out-params
            L1hit    = prv_hit;
            L1upgr   = L1hit && !prv_excl;
            L2hit    = sh_hit;
            c2c      = false; // will be set below for appropriate cases
            L1ev     = false; // will be set below
            L1wb     = false; // will be set below
            L2ev     = false; // will be set below
            L2wb     = false; // will be set below
            L2access = false; // will be set below

            // ----------------- now, we execute one of four cases:
            //                   1a. present in private cache, with appropriate ownership.
            //                   1b. present in private cache, but not excl (for a write)
            //                   2. not present in private cache, but in shared cache.
            //                   3. not present in private or shared cache.
            //
            // in each case, we update functional state and generate the packet DAG as we go.

            if (prv_hit && (!write || prv_excl)) // CASE 1a: present in prv cache, have excl if write
            {
                // just set modified-bit in state, then we're done (no protocol interaction)
                if (write)
                {
                    state.modified = true;
                }
            }
            else if (prv_hit && write && !prv_excl) // CASE 1b: present in prv cache, need upgr
            {
                txn      = new CmpCache_Txn();
                txn.node = node;

                // request packet
                CmpCache_Pkt req_pkt  = add_ctl_pkt(txn, node, sh_slice, false, false);
                CmpCache_Pkt done_pkt = null;

                // present in others?
                if (state.owners.others_set(node))
                {
                    done_pkt = do_inval(txn, state, req_pkt, node, addr);
                }
                else
                {
                    // not present in others, but we didn't have excl -- send empty grant
                    // (could happen if others have evicted and we are the only one left)
                    done_pkt       = add_ctl_pkt(txn, sh_slice, node, true, false);
                    done_pkt.delay = m_shdelay;
                    add_dep(req_pkt, done_pkt);
                }

                state.owners.reset();
                state.owners.set(node);
                state.excl     = node;
                state.modified = true;
            }
            else if (!prv_hit && sh_hit) // CASE 2: not in prv cache, but in sh cache
            {
                txn      = new CmpCache_Txn();
                txn.node = node;

                // update functional shared state
                if (!m_sh_perfect)
                {
                    m_sh.update(addr, Simulator.CurrentRound);
                }

                // request packet
                CmpCache_Pkt req_pkt  = add_ctl_pkt(txn, node, sh_slice, false, false);
                CmpCache_Pkt done_pkt = null;

                if (state.owners.any_set())   // in other caches?
                {
                    if (write)                // need to invalidate?
                    {
                        if (state.excl != -1) // someone else has exclusive -- c-to-c xfer
                        {
                            c2c = true;       // out-param

                            CmpCache_Pkt xfer_req = add_ctl_pkt(txn, sh_slice, state.excl, false, false);
                            CmpCache_Pkt xfer_dat = add_data_pkt(txn, state.excl, node, true, false);
                            done_pkt = xfer_dat;

                            xfer_req.delay = m_shdelay;
                            xfer_dat.delay = m_prvdelay;

                            add_dep(req_pkt, xfer_req);
                            add_dep(xfer_req, xfer_dat);

                            bool evicted_state;
                            m_prv[state.excl].inval(addr, out evicted_state);
                        }
                        else // others have it -- inval to all, c-to-c from closest
                        {
                            int close = closest(node, state.owners);
                            if (close != -1)
                            {
                                c2c = true;              // out-param
                            }
                            done_pkt = do_inval(txn, state, req_pkt, node, addr, close);
                        }

                        // for a write, we need exclusive -- update state
                        state.owners.reset();
                        state.owners.set(node);
                        state.excl     = node;
                        state.modified = true;
                    }
                    else // just a read -- joining sharer set, c-to-c from closest
                    {
                        if (state.excl != -1)
                        {
                            CmpCache_Pkt xfer_req = add_ctl_pkt(txn, sh_slice, state.excl, false, false);
                            CmpCache_Pkt xfer_dat = add_data_pkt(txn, state.excl, node, true, false);
                            done_pkt = xfer_dat;

                            c2c = true; // out-param

                            xfer_req.delay = m_shdelay;
                            xfer_dat.delay = m_prvdelay;

                            add_dep(req_pkt, xfer_req);
                            add_dep(xfer_req, xfer_dat);

                            // downgrade must also trigger writeback
                            if (state.modified)
                            {
                                CmpCache_Pkt wb_dat = add_data_pkt(txn, state.excl, sh_slice, false, false);
                                add_dep(xfer_req, wb_dat);
                                state.modified = false;
                                state.sh_dirty = true;
                            }
                        }
                        else
                        {
                            int close = closest(node, state.owners);
                            if (close != -1)
                            {
                                c2c = true;              // out-param
                            }
                            CmpCache_Pkt xfer_req = add_ctl_pkt(txn, sh_slice, close, false, false);
                            CmpCache_Pkt xfer_dat = add_data_pkt(txn, close, node, true, false);
                            done_pkt = xfer_dat;

                            xfer_req.delay = m_shdelay;
                            xfer_dat.delay = m_prvdelay;

                            add_dep(req_pkt, xfer_req);
                            add_dep(xfer_req, xfer_dat);
                        }

                        state.owners.set(node);
                        state.excl = -1;
                    }
                }
                else
                {
                    // not in other prv caches, need to get from shared slice
                    L2access = true;

                    CmpCache_Pkt dat_resp = add_data_pkt(txn, sh_slice, node, true, false);
                    done_pkt = dat_resp;

                    add_dep(req_pkt, done_pkt);

                    dat_resp.delay = m_shdelay;

                    state.owners.reset();
                    state.owners.set(node);
                    state.excl     = node;
                    state.modified = write;
                }

                // insert into private cache, get evicted block (if any)
                ulong evict_addr;
                bool  evict_data;
                bool  evicted = m_prv[node].insert(addr, true, out evict_addr, out evict_data, Simulator.CurrentRound);

                // add either a writeback or a release packet
                if (evicted)
                {
                    L1ev = true;
                    do_evict(txn, done_pkt, node, evict_addr, out L1wb);
                }
            }
            else if (!prv_hit && !sh_hit) // CASE 3: not in prv or shared cache
            {
                // here, we need to go to memory
                Debug.Assert(!m_sh_perfect);

                txn      = new CmpCache_Txn();
                txn.node = node;

                /* HWA CODE */
                // HWA does not access private and shared caches, sends a packet to the memory controller directly
                if ((Simulator.network.nodes[node].cpu.is_HWA()) ||
                    (Config.is_through_all_cache))
                {
                    int mem_slice = map_addr_mem(node, addr);
                    if (write)
                    {
                        CmpCache_Pkt mem_wr = add_data_pkt(txn, node, mem_slice, false, false);
                        mem_wr.delay = 0; // Is it right??
                        // Send virtual node
                        CmpCache_Pkt mem_wr_op = add_ctl_pkt(txn, 0, 0, true, false);
                        mem_wr_op.send          = false;
                        mem_wr_op.mem           = true;
                        mem_wr_op.mem_addr      = addr;
                        mem_wr_op.mem_write     = true;
                        mem_wr_op.mem_requestor = node;
                        add_dep(mem_wr, mem_wr_op); // When write, HWA does not wait for response packet from memory
                    }
                    else
                    {
                        CmpCache_Pkt memreq_pkt = add_ctl_pkt(txn, node, mem_slice, false, false);
                        memreq_pkt.delay = 0; // Is it right??
                        CmpCache_Pkt mem_access = add_ctl_pkt(txn, 0, 0, false, false);
                        mem_access.send          = false;
                        mem_access.mem           = true;
                        mem_access.mem_addr      = addr;
                        mem_access.mem_write     = false;
                        mem_access.mem_requestor = node;
                        // When read, HWA waits for response packet from memory
                        CmpCache_Pkt memresp_pkt = add_data_pkt(txn, mem_slice, node, true, false);
                        memresp_pkt.delay = 0;

                        add_dep(memreq_pkt, mem_access);
                        add_dep(mem_access, memresp_pkt);
                    }
                }
                else
                {
                    /* HWA CODE END */

                    L2access = true;

                    //TODO: This seems to be true ... but not getting propagated...
                    //Console.WriteLine("This is a GPU request, going into case 3 in the access function, isGPU = {0}",Simulator.network.nodes[node].cpu.is_GPU());
                    //

                    // request packet
                    CmpCache_Pkt req_pkt = add_ctl_pkt(txn, node, sh_slice, false, Simulator.network.nodes[node].cpu.is_GPU());

                    // cache response packet
                    CmpCache_Pkt resp_pkt = add_data_pkt(txn, sh_slice, node, true, Simulator.network.nodes[node].cpu.is_GPU());
                    resp_pkt.delay = m_opdelay; // req already active -- just a pass-through op delay here

                    // memory request packet
                    int mem_slice = map_addr_mem(node, addr);
                    // TODO: Rachata: Check this part. This has to includes the GPU tag
                    CmpCache_Pkt memreq_pkt = add_ctl_pkt(txn, sh_slice, mem_slice, false, Simulator.network.nodes[node].cpu.is_GPU());
                    memreq_pkt.delay = m_shdelay;

                    // memory-access virtual node
                    CmpCache_Pkt mem_access = add_ctl_pkt(txn, 0, 0, false, Simulator.network.nodes[node].cpu.is_GPU());
                    mem_access.send          = false;
                    mem_access.mem           = true;
                    mem_access.mem_addr      = addr;
                    mem_access.mem_write     = false; // cache-line fill
                    mem_access.mem_requestor = node;

                    // memory response packet
                    // TODO: Rachata: Same here: check this part. This has to includes the GPU tag for the return packet
                    CmpCache_Pkt memresp_pkt = add_data_pkt(txn, mem_slice, sh_slice, false, Simulator.network.nodes[node].cpu.is_GPU());

                    // connect up the critical path first
                    add_dep(req_pkt, memreq_pkt);
                    add_dep(memreq_pkt, mem_access);
                    add_dep(mem_access, memresp_pkt);
                    add_dep(memresp_pkt, resp_pkt);

                    // -- not the GPU -- can evict
                    //  TODO: check this
//                if(!Simulator.network.nodes[node].cpu.is_GPU())
                    if (!Simulator.network.nodes[node].cpu.is_GPU())
                    // test
                    //if(true)
                    {
                        // now, handle replacement in the shared cache...
                        CmpCache_State new_state = new CmpCache_State();

                        new_state.owners.reset();
                        new_state.owners.set(node);
                        new_state.excl     = node;
                        new_state.modified = write;
                        new_state.sh_dirty = false;

                        ulong          sh_evicted_addr;
                        CmpCache_State sh_evicted_state;
                        bool           evicted = m_sh.insert(addr, new_state, out sh_evicted_addr, out sh_evicted_state, Simulator.CurrentRound);

                        if (evicted)
                        {
                            // shared-cache eviction (different from the private-cache evictions elsewhere):
                            // we must evict any private-cache copies, because we model an inclusive hierarchy.

                            L2ev = true;

                            CmpCache_Pkt prv_evict_join = add_joinpt(txn, false);

                            if (sh_evicted_state.excl != -1) // evicted block lives only in one prv cache
                            {
                                // invalidate request to prv cache before sh cache does eviction
                                CmpCache_Pkt prv_invl = add_ctl_pkt(txn, sh_slice, sh_evicted_state.excl, false, false);
                                add_dep(memresp_pkt, prv_invl);
                                CmpCache_Pkt prv_wb;

                                prv_invl.delay = m_opdelay;

                                if (sh_evicted_state.modified)
                                {
                                    // writeback
                                    prv_wb       = add_data_pkt(txn, sh_evicted_state.excl, sh_slice, false, false);
                                    prv_wb.delay = m_prvdelay;
                                    sh_evicted_state.sh_dirty = true;
                                }
                                else
                                {
                                    // simple ACK
                                    prv_wb       = add_ctl_pkt(txn, sh_evicted_state.excl, sh_slice, false, false);
                                    prv_wb.delay = m_prvdelay;
                                }

                                add_dep(prv_invl, prv_wb);
                                add_dep(prv_wb, prv_evict_join);

                                bool prv_evicted_dat;
                                m_prv[sh_evicted_state.excl].inval(sh_evicted_addr, out prv_evicted_dat);
                            }
                            else if (sh_evicted_state.owners.any_set()) // evicted block has greater-than-one sharer set
                            {
                                for (int i = 0; i < m_N; i++)
                                {
                                    if (sh_evicted_state.owners.is_set(i))
                                    {
                                        CmpCache_Pkt prv_invl = add_ctl_pkt(txn, sh_slice, i, false, false);
                                        CmpCache_Pkt prv_ack  = add_ctl_pkt(txn, i, sh_slice, false, false);

                                        prv_invl.delay = m_opdelay;
                                        prv_ack.delay  = m_prvdelay;

                                        add_dep(memresp_pkt, prv_invl);
                                        add_dep(prv_invl, prv_ack);
                                        add_dep(prv_ack, prv_evict_join);

                                        bool prv_evicted_dat;
                                        m_prv[i].inval(sh_evicted_addr, out prv_evicted_dat);
                                    }
                                }
                            }
                            else // evicted block has no owners (was only in shared cache)
                            {
                                add_dep(memresp_pkt, prv_evict_join);
                            }

                            // now writeback to memory, if we were dirty
                            if (sh_evicted_state.sh_dirty)
                            {
                                CmpCache_Pkt mem_wb = add_data_pkt(txn, sh_slice, mem_slice, false, false);
                                mem_wb.delay = m_opdelay;
                                add_dep(prv_evict_join, mem_wb);
                                CmpCache_Pkt mem_wb_op = add_ctl_pkt(txn, 0, 0, false, false);
                                mem_wb_op.send          = false;
                                mem_wb_op.mem           = true;
                                mem_wb_op.mem_addr      = sh_evicted_addr;
                                mem_wb_op.mem_write     = true;
                                mem_wb_op.mem_requestor = node;
                                add_dep(mem_wb, mem_wb_op);
                                L2wb = true;
                            }
                        }

                        // ...and insert and handle replacement in the private cache
                        ulong evict_addr;
                        bool  evict_data;
                        bool  prv_evicted = m_prv[node].insert(addr, true, out evict_addr, out evict_data, Simulator.CurrentRound);

                        // add either a writeback or a release packet
                        if (prv_evicted)
                        {
                            L1ev = true;
                            do_evict(txn, resp_pkt, node, evict_addr, out L1wb);
                        }
                    }
                    else // GPU -- not doing anything
                    {
                    }
                    /* HWA CODE */
                }
                /* HWA CODE END */
            }
            else // shouldn't happen.
            {
                Debug.Assert(false);
            }

            // now start the transaction, if one was needed
            if (txn != null)
            {
                txn.cb = cb;

                assignVCclasses(txn.pkts);

                // start running the protocol DAG. It may be an empty graph (for a silent upgr), in
                // which case the deferred start (after cache delay)
                Simulator.Defer(delegate()
                {
                    start_pkts(txn);
                }, Simulator.CurrentRound + m_prvdelay);
            }
            // no transaction -- just the cache access delay. schedule deferred callback.
            else
            {
                Simulator.Defer(cb, Simulator.CurrentRound + m_prvdelay);
            }
        }