dhschall / gem5-fdp

Development repository for Fetch Directed Instruction Prefetching (FDP) in gem5
http://www.gem5.org
BSD 3-Clause "New" or "Revised" License
15 stars 1 forks source link

High host CPU memory requirement #4

Open atrah22 opened 10 months ago

atrah22 commented 10 months ago

Hello, I ran tuned ARM cpu full system simulation for a large workload (that takes 8 hours on a supercomputer) using gem5-dev branch code. However, with this commit, sometimes my job is killed which I think is due to excessive memory requirement. If I run a small workload then it works fine. Could you identify or estimate, where can we improve memory management in terms of the implementation of fdp and/or the associative BTB?

I changed associative BTB to simpleBTB_v2 like below:

namespace branch_prediction
{

SimpleBTB::SimpleBTB(const SimpleBTBParams &p)
    : BranchTargetBuffer(p),
        numEntries(p.numEntries),
        tagBits(p.tagBits),
        instShiftAmt(p.instShiftAmt),
        log2NumThreads(floorLog2(p.numThreads))
{
    DPRINTF(BTB, "BTB: Creating BTB object.\n");

    if (!isPowerOf2(numEntries)) {
        fatal("BTB entries is not a power of 2!");
    }

    btb.resize(numEntries);

    for (unsigned i = 0; i < numEntries; ++i) {
        btb[i].valid = false;
    }

    idxMask = numEntries - 1;

    tagMask = (1 << tagBits) - 1;

    tagShiftAmt = instShiftAmt + floorLog2(numEntries);
}

void
SimpleBTB::memInvalidate()
{
    for (unsigned i = 0; i < numEntries; ++i) {
        btb[i].valid = false;
    }
}

inline
unsigned
SimpleBTB::getIndex(Addr instPC, ThreadID tid)
{
    // Need to shift PC over by the word offset.
    return ((instPC >> instShiftAmt)
            ^ (tid << (tagShiftAmt - instShiftAmt - log2NumThreads)))
            & idxMask;
}

inline
Addr
SimpleBTB::getTag(Addr instPC)
{
    return (instPC >> tagShiftAmt) & tagMask;
}

SimpleBTB::BTBEntry *
SimpleBTB::findEntry(Addr instPC, ThreadID tid)
{
    unsigned btb_idx = getIndex(instPC, tid);
    Addr inst_tag = getTag(instPC);

    assert(btb_idx < numEntries);

    if (btb[btb_idx].valid
        && inst_tag == btb[btb_idx].tag
        && btb[btb_idx].tid == tid) {
        return &btb[btb_idx];
    }

    return nullptr;
}

bool
SimpleBTB::valid(ThreadID tid, Addr instPC)
{
    BTBEntry *entry = findEntry(instPC, tid);

    return entry != nullptr;
}

// @todo Create some sort of return struct that has both whether or not the
// address is valid, and also the address.  For now will just use addr = 0 to
// represent invalid entry.
const PCStateBase *
SimpleBTB::lookup(ThreadID tid, Addr instPC, BranchType type)
{
    stats.lookups[type]++;

    BTBEntry *entry = findEntry(instPC, tid);

    if (entry) {
        return entry->target.get();
    }
    stats.misses[type]++;
    return nullptr;
}

const StaticInstPtr
SimpleBTB::lookupInst(ThreadID tid, Addr instPC)
{
    BTBEntry *entry = findEntry(instPC, tid);

    if (entry) {
        return entry->inst;
    }
    return nullptr;
}

void
SimpleBTB::update(ThreadID tid, Addr instPC,
                    const PCStateBase &target,
                    BranchType type, StaticInstPtr inst)
{
    unsigned btb_idx = getIndex(instPC, tid);

    assert(btb_idx < numEntries);

    stats.updates[type]++;

    btb[btb_idx].tid = tid;
    btb[btb_idx].valid = true;
    set(btb[btb_idx].target, target);
    btb[btb_idx].tag = getTag(instPC);
    btb[btb_idx].inst = inst;
}

} // namespace branch_prediction
} // namespace gem5
Prithvi-Velicheti commented 3 months ago

I am also encountering similar issue for X86, the processes are getting killed.

Sp1c4 commented 3 months ago

@Prithvi-Velicheti maybe check this #1

Prithvi-Velicheti commented 3 months ago

Thanks @Sp1c4